|  | @@ -13,6 +13,15 @@
 | 
	
		
			
				|  |  |  # See the License for the specific language governing permissions and
 | 
	
		
			
				|  |  |  # limitations under the License.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +# Script to extract build metadata from bazel BUILD.
 | 
	
		
			
				|  |  | +# To avoid having two sources of truth for the build metadata (build
 | 
	
		
			
				|  |  | +# targets, source files, header files etc.), this script analyzes the contents
 | 
	
		
			
				|  |  | +# of bazel BUILD files and generates a YAML file (currently called
 | 
	
		
			
				|  |  | +# build_autogenerated.yaml). The format and semantics of the generated YAML files
 | 
	
		
			
				|  |  | +# is chosen to match the format of a "build.yaml" file, which used
 | 
	
		
			
				|  |  | +# to be build the source of truth for gRPC build before bazel became
 | 
	
		
			
				|  |  | +# the primary build system.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  import subprocess
 | 
	
		
			
				|  |  |  import yaml
 | 
	
		
			
				|  |  |  import xml.etree.ElementTree as ET
 | 
	
	
		
			
				|  | @@ -32,6 +41,7 @@ def _bazel_query_xml_tree(query):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _rule_dict_from_xml_node(rule_xml_node):
 | 
	
		
			
				|  |  | +    """Converts XML node representing a rule (obtained from "bazel query --output xml") to a dictionary that contains all the metadata we will need."""
 | 
	
		
			
				|  |  |      result = {
 | 
	
		
			
				|  |  |          'class': rule_xml_node.attrib.get('class'),
 | 
	
		
			
				|  |  |          'name': rule_xml_node.attrib.get('name'),
 | 
	
	
		
			
				|  | @@ -63,6 +73,7 @@ def _rule_dict_from_xml_node(rule_xml_node):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _extract_rules_from_bazel_xml(xml_tree):
 | 
	
		
			
				|  |  | +    """Extract bazel rules from an XML tree node obtained from "bazel query --output xml" command."""
 | 
	
		
			
				|  |  |      result = {}
 | 
	
		
			
				|  |  |      for child in xml_tree:
 | 
	
		
			
				|  |  |          if child.tag == 'rule':
 | 
	
	
		
			
				|  | @@ -133,8 +144,13 @@ def _extract_deps(bazel_rule):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _create_target_from_bazel_rule(target_name, bazel_rules):
 | 
	
		
			
				|  |  | -    # extract the deps from bazel
 | 
	
		
			
				|  |  | +    """Create build.yaml-like target definition from bazel metadata"""
 | 
	
		
			
				|  |  |      bazel_rule = bazel_rules[_get_bazel_label(target_name)]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # Create a template for our target from the bazel rule. Initially we only
 | 
	
		
			
				|  |  | +    # populate some "private" fields with the original info we got from bazel
 | 
	
		
			
				|  |  | +    # and only later we will populate the public fields (once we do some extra
 | 
	
		
			
				|  |  | +    # postprocessing).
 | 
	
		
			
				|  |  |      result = {
 | 
	
		
			
				|  |  |          'name': target_name,
 | 
	
		
			
				|  |  |          '_PUBLIC_HEADERS_BAZEL': _extract_public_headers(bazel_rule),
 | 
	
	
		
			
				|  | @@ -312,22 +328,33 @@ def _expand_intermediate_deps(target_dict, public_dep_names, bazel_rules):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _generate_build_metadata(build_extra_metadata, bazel_rules):
 | 
	
		
			
				|  |  | +    """Generate build metadata in build.yaml-like format bazel build metadata and build.yaml-specific "extra metadata"."""
 | 
	
		
			
				|  |  |      lib_names = build_extra_metadata.keys()
 | 
	
		
			
				|  |  |      result = {}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      for lib_name in lib_names:
 | 
	
		
			
				|  |  |          lib_dict = _create_target_from_bazel_rule(lib_name, bazel_rules)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +        # Figure out the final list of headers and sources for given target.
 | 
	
		
			
				|  |  | +        # While this is mostly based on bazel build metadata, build.yaml does
 | 
	
		
			
				|  |  | +        # not necessarily expose all the targets that are present in bazel build.
 | 
	
		
			
				|  |  | +        # These "intermediate dependencies" might get flattened.
 | 
	
		
			
				|  |  | +        # TODO(jtattermusch): This is done to avoid introducing too many intermediate
 | 
	
		
			
				|  |  | +        # libraries into the build.yaml-based builds (which might in turn cause issues
 | 
	
		
			
				|  |  | +        # building language-specific artifacts). The need for elision (and expansion)
 | 
	
		
			
				|  |  | +        # of intermediate libraries can be re-evaluated in the future.
 | 
	
		
			
				|  |  |          _expand_intermediate_deps(lib_dict, lib_names, bazel_rules)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        # populate extra properties from build metadata
 | 
	
		
			
				|  |  | +        # populate extra properties from the build.yaml-specific "extra metadata"
 | 
	
		
			
				|  |  |          lib_dict.update(build_extra_metadata.get(lib_name, {}))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          # store to results
 | 
	
		
			
				|  |  |          result[lib_name] = lib_dict
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    # rename some targets to something else
 | 
	
		
			
				|  |  | -    # this needs to be made after we're done with most of processing logic
 | 
	
		
			
				|  |  | +    # Rename targets marked with "_RENAME" extra metadata.
 | 
	
		
			
				|  |  | +    # This is mostly a cosmetic change to ensure that we end up with build.yaml target
 | 
	
		
			
				|  |  | +    # names we're used to from the past (and also to avoid too long target names).
 | 
	
		
			
				|  |  | +    # The rename step needs to be made after we're done with most of processing logic
 | 
	
		
			
				|  |  |      # otherwise the already-renamed libraries will have different names than expected
 | 
	
		
			
				|  |  |      for lib_name in lib_names:
 | 
	
		
			
				|  |  |          to_name = build_extra_metadata.get(lib_name, {}).get('_RENAME', None)
 | 
	
	
		
			
				|  | @@ -410,8 +437,8 @@ def _extract_cc_tests(bazel_rules):
 | 
	
		
			
				|  |  |      return list(sorted(result))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -def _filter_cc_tests(tests):
 | 
	
		
			
				|  |  | -    """Filters out tests that we don't want or we cannot build them reasonably"""
 | 
	
		
			
				|  |  | +def _exclude_unwanted_cc_tests(tests):
 | 
	
		
			
				|  |  | +    """Filters out bazel tests that we don't want to run with other build systems or we cannot build them reasonably"""
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      # most qps tests are autogenerated, we are fine without them
 | 
	
		
			
				|  |  |      tests = list(
 | 
	
	
		
			
				|  | @@ -472,6 +499,7 @@ def _filter_cc_tests(tests):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _generate_build_extra_metadata_for_tests(tests, bazel_rules):
 | 
	
		
			
				|  |  | +    """For given tests, generate the "extra metadata" that we need for our "build.yaml"-like output. The extra metadata is generated from the bazel rule metadata by using a bunch of heuristics."""
 | 
	
		
			
				|  |  |      test_metadata = {}
 | 
	
		
			
				|  |  |      for test in tests:
 | 
	
		
			
				|  |  |          test_dict = {'build': 'test', '_TYPE': 'target'}
 | 
	
	
		
			
				|  | @@ -567,6 +595,16 @@ def _generate_build_extra_metadata_for_tests(tests, bazel_rules):
 | 
	
		
			
				|  |  |      return test_metadata
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +def _detect_and_print_issues(build_yaml_like):
 | 
	
		
			
				|  |  | +    """Try detecting some unusual situations and warn about them."""
 | 
	
		
			
				|  |  | +    for tgt in build_yaml_like['targets']:
 | 
	
		
			
				|  |  | +        if tgt['build'] == 'test':
 | 
	
		
			
				|  |  | +            for src in tgt['src']:
 | 
	
		
			
				|  |  | +                if src.startswith('src/') and not src.endswith('.proto'):
 | 
	
		
			
				|  |  | +                    print('source file from under "src/" tree used in test ' +
 | 
	
		
			
				|  |  | +                          tgt['name'] + ': ' + src)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  # extra metadata that will be used to construct build.yaml
 | 
	
		
			
				|  |  |  # there are mostly extra properties that we weren't able to obtain from the bazel build
 | 
	
		
			
				|  |  |  # _TYPE: whether this is library, target or test
 | 
	
	
		
			
				|  | @@ -937,31 +975,86 @@ _BAZEL_DEPS_QUERIES = [
 | 
	
		
			
				|  |  |      'deps("//src/proto/...")',
 | 
	
		
			
				|  |  |  ]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +# Step 1: run a bunch of "bazel query --output xml" queries to collect
 | 
	
		
			
				|  |  | +# the raw build metadata from the bazel build.
 | 
	
		
			
				|  |  | +# At the end of this step we will have a dictionary of bazel rules
 | 
	
		
			
				|  |  | +# that are interesting to us (libraries, binaries, etc.) along
 | 
	
		
			
				|  |  | +# with their most important metadata (sources, headers, dependencies)
 | 
	
		
			
				|  |  |  bazel_rules = {}
 | 
	
		
			
				|  |  |  for query in _BAZEL_DEPS_QUERIES:
 | 
	
		
			
				|  |  |      bazel_rules.update(
 | 
	
		
			
				|  |  |          _extract_rules_from_bazel_xml(_bazel_query_xml_tree(query)))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +# Step 1a: Knowing the transitive closure of dependencies will make
 | 
	
		
			
				|  |  | +# the postprocessing simpler, so compute the info for all our rules.
 | 
	
		
			
				|  |  |  _populate_transitive_deps(bazel_rules)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -tests = _filter_cc_tests(_extract_cc_tests(bazel_rules))
 | 
	
		
			
				|  |  | -test_metadata = _generate_build_extra_metadata_for_tests(tests, bazel_rules)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -all_metadata = {}
 | 
	
		
			
				|  |  | -all_metadata.update(_BUILD_EXTRA_METADATA)
 | 
	
		
			
				|  |  | -all_metadata.update(test_metadata)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -all_targets_dict = _generate_build_metadata(all_metadata, bazel_rules)
 | 
	
		
			
				|  |  | +# Step 2: Extract the known bazel cc_test tests. While most tests
 | 
	
		
			
				|  |  | +# will be buildable with other build systems just fine, some of these tests
 | 
	
		
			
				|  |  | +# would be too difficult to build and run with other build systems,
 | 
	
		
			
				|  |  | +# so we simply the ones we don't want.
 | 
	
		
			
				|  |  | +tests = _exclude_unwanted_cc_tests(_extract_cc_tests(bazel_rules))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# Step 3: Generate the "extra metadata" for all our build targets.
 | 
	
		
			
				|  |  | +# While the bazel rules give us most of the information we need,
 | 
	
		
			
				|  |  | +# the legacy "build.yaml" format requires some additional fields that
 | 
	
		
			
				|  |  | +# we cannot get just from bazel alone (we call that "extra metadata").
 | 
	
		
			
				|  |  | +# In this step, we basically analyze the build metadata we have from bazel
 | 
	
		
			
				|  |  | +# and use heuristics to determine (and sometimes guess) the right
 | 
	
		
			
				|  |  | +# extra metadata to use for each target.
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +# - For some targets (such as the public libraries, helper libraries
 | 
	
		
			
				|  |  | +#   and executables) determining the right extra metadata is hard to do
 | 
	
		
			
				|  |  | +#   automatically. For these targets, the extra metadata is supplied "manually"
 | 
	
		
			
				|  |  | +#   in form of the _BUILD_EXTRA_METADATA dictionary. That allows us to match
 | 
	
		
			
				|  |  | +#   the semantics of the legacy "build.yaml" as closely as possible.
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +# - For test binaries, it is possible to generate the "extra metadata" mostly
 | 
	
		
			
				|  |  | +#   automatically using a rule-based heuristic approach because most tests
 | 
	
		
			
				|  |  | +#   look and behave alike from the build's perspective.
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +# TODO(jtattermusch): Of course neither "_BUILD_EXTRA_METADATA" or
 | 
	
		
			
				|  |  | +# the heuristic approach used for tests are ideal and they cannot be made
 | 
	
		
			
				|  |  | +# to cover all possible situations (and are tailored to work with the way
 | 
	
		
			
				|  |  | +# the grpc build currently works), but the idea was to start with something
 | 
	
		
			
				|  |  | +# reasonably simple that matches the "build.yaml"-like semantics as closely
 | 
	
		
			
				|  |  | +# as possible (to avoid changing too many things at once) and gradually get
 | 
	
		
			
				|  |  | +# rid of the legacy "build.yaml"-specific fields one by one. Once that is done,
 | 
	
		
			
				|  |  | +# only very little "extra metadata" would be needed and/or it would be trivial
 | 
	
		
			
				|  |  | +# to generate it automatically.
 | 
	
		
			
				|  |  | +all_extra_metadata = {}
 | 
	
		
			
				|  |  | +all_extra_metadata.update(_BUILD_EXTRA_METADATA)
 | 
	
		
			
				|  |  | +all_extra_metadata.update(
 | 
	
		
			
				|  |  | +    _generate_build_extra_metadata_for_tests(tests, bazel_rules))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# Step 4: Generate the final metadata for all the targets.
 | 
	
		
			
				|  |  | +# This is done by combining the bazel build metadata and the "extra metadata"
 | 
	
		
			
				|  |  | +# we obtained in the previous step.
 | 
	
		
			
				|  |  | +# In this step, we also perform some interesting massaging of the target metadata
 | 
	
		
			
				|  |  | +# to end up with a result that is as similar to the legacy build.yaml data
 | 
	
		
			
				|  |  | +# as possible.
 | 
	
		
			
				|  |  | +# - Some targets get renamed (to match the legacy build.yaml target names)
 | 
	
		
			
				|  |  | +# - Some intermediate libraries get elided ("expanded") to better match the set
 | 
	
		
			
				|  |  | +#   of targets provided by the legacy build.yaml build
 | 
	
		
			
				|  |  | +all_targets_dict = _generate_build_metadata(all_extra_metadata, bazel_rules)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# Step 5: convert the dictionary with all the targets to a dict that has
 | 
	
		
			
				|  |  | +# the desired "build.yaml"-like layout.
 | 
	
		
			
				|  |  | +# TODO(jtattermusch): We use the custom "build.yaml"-like layout because
 | 
	
		
			
				|  |  | +# currently all other build systems use that format as their source of truth.
 | 
	
		
			
				|  |  | +# In the future, we can get rid of this custom & legacy format entirely,
 | 
	
		
			
				|  |  | +# but we would need to update the generators for other build systems
 | 
	
		
			
				|  |  | +# at the same time.
 | 
	
		
			
				|  |  |  build_yaml_like = _convert_to_build_yaml_like(all_targets_dict)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -# if a test uses source files from src/ directly, it's a little bit suspicious
 | 
	
		
			
				|  |  | -for tgt in build_yaml_like['targets']:
 | 
	
		
			
				|  |  | -    if tgt['build'] == 'test':
 | 
	
		
			
				|  |  | -        for src in tgt['src']:
 | 
	
		
			
				|  |  | -            if src.startswith('src/') and not src.endswith('.proto'):
 | 
	
		
			
				|  |  | -                print('source file from under "src/" tree used in test ' +
 | 
	
		
			
				|  |  | -                      tgt['name'] + ': ' + src)
 | 
	
		
			
				|  |  | +_detect_and_print_issues(
 | 
	
		
			
				|  |  | +    build_yaml_like
 | 
	
		
			
				|  |  | +)  # detect and report some suspicious situations we've seen before
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +# Step 6: Store the build_autogenerated.yaml in a deterministic (=sorted)
 | 
	
		
			
				|  |  | +# and cleaned-up form.
 | 
	
		
			
				|  |  | +# TODO(jtattermusch): The "cleanup" function is taken from the legacy
 | 
	
		
			
				|  |  | +# build system (which used build.yaml) and can be eventually removed.
 | 
	
		
			
				|  |  |  build_yaml_string = build_cleaner.cleaned_build_yaml_dict_as_string(
 | 
	
		
			
				|  |  |      build_yaml_like)
 | 
	
		
			
				|  |  |  with open('build_autogenerated.yaml', 'w') as file:
 |