src/llvm-project/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py - toolchain/rustc - Git at Google

 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 r"""Combine multiple training corpus into a single training corpus.

 Currently only support the case that multiple corpus share the same
 configurables except the "modules" field.

 Usage: we'd like to combine training corpus corpus1 and corpus2 into
 combinedcorpus; we first structure the files as follows:

 combinedcorpus
 combinedcorpus/corpus1
 combinedcorpus/corpus2

 Running this script with

 python3 \
 compiler_opt/tools/combine_training_corpus.py \
   --root_dir=$PATH_TO_combinedcorpus

 generates combinedcorpus/corpus_description.json file. In this way corpus1
 and corpus2 are combined into combinedcorpus.
 """

 import argparse

 from mlgo.corpus import combine_training_corpus_lib


 def parse_args_and_run():
     parser = argparse.ArgumentParser(
         description="A tool for combining multiple training corpora"
     )
     parser.add_argument(
         "--root_dir", type=str, help="The root dir of module paths to combine."
     )
     args = parser.parse_args()
     main(args)


 def main(args):
     combine_training_corpus_lib.combine_corpus(args.root_dir)


 if __name__ == "__main__":
     parse_args_and_run()
	# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	r"""Combine multiple training corpus into a single training corpus.

	Currently only support the case that multiple corpus share the same
	configurables except the "modules" field.

	Usage: we'd like to combine training corpus corpus1 and corpus2 into
	combinedcorpus; we first structure the files as follows:

	combinedcorpus
	combinedcorpus/corpus1
	combinedcorpus/corpus2

	Running this script with

	python3 \
	compiler_opt/tools/combine_training_corpus.py \
	--root_dir=$PATH_TO_combinedcorpus

	generates combinedcorpus/corpus_description.json file. In this way corpus1
	and corpus2 are combined into combinedcorpus.
	"""

	import argparse

	from mlgo.corpus import combine_training_corpus_lib


	def parse_args_and_run():
	parser = argparse.ArgumentParser(
	description="A tool for combining multiple training corpora"
	)
	parser.add_argument(
	"--root_dir", type=str, help="The root dir of module paths to combine."
	)
	args = parser.parse_args()
	main(args)


	def main(args):
	combine_training_corpus_lib.combine_corpus(args.root_dir)


	if __name__ == "__main__":
	parse_args_and_run()