| """Generates cubin headers for TF dialect ops.""" |
| |
| load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures") |
| load( |
| "@local_config_rocm//rocm:build_defs.bzl", |
| "rocm_gpu_architectures", |
| ) |
| load("//tensorflow:tensorflow.bzl", "get_compatible_with_cloud") |
| load( |
| "//tensorflow/stream_executor:build_defs.bzl", |
| "if_gpu_is_configured", |
| ) |
| load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") |
| |
| def if_mlir_generated_gpu_kernels_enabled(if_true, if_false = []): |
| return select({ |
| "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_disabled": if_false, |
| "//conditions:default": if_true, |
| }) |
| |
| def _lookup_file(filegroup, path): |
| """Extracts file at (relative) path in filegroup.""" |
| for file in filegroup.files.to_list(): |
| if file.path.endswith(path) or file.path.endswith(path + ".exe"): |
| return file |
| return None |
| |
| GpuBinaryInfo = provider( |
| "GPU binaries in either cubin format or hsaco format", |
| fields = ["gpu_bins"], |
| ) |
| |
| type_to_mlir = { |
| "c64": "complex<f32>", |
| "c128": "complex<f64>", |
| } |
| |
| def _get_mlir_type(type): |
| """Return the mlir type corresponding to 'type'""" |
| if type in type_to_mlir: |
| return type_to_mlir[type] |
| return type |
| |
| def _gen_mlir_op_impl(ctx): |
| mlir_type = _get_mlir_type(ctx.attr.type) |
| mlir_output_type = _get_mlir_type(ctx.attr.output_type) |
| |
| cmd = ctx.actions.run_shell( |
| inputs = [ctx.file.template], |
| outputs = [ctx.outputs.out], |
| command = ( |
| (("cat %s | sed 's/platform/%s/g' | sed 's/_elem_type/_%s/g' | " + |
| "sed 's/elem_type/%s/g' | " + "sed 's/_output_type/_%s/g' | " + |
| "sed 's/output_type/%s/g' > %s")) % ( |
| ctx.file.template.path, |
| ctx.attr.platform.upper(), |
| ctx.attr.type, |
| mlir_type, |
| ctx.attr.output_type, |
| mlir_output_type, |
| ctx.outputs.out.path, |
| ) |
| ), |
| ) |
| |
| _gen_mlir_op_rule = rule( |
| implementation = _gen_mlir_op_impl, |
| output_to_genfiles = True, |
| attrs = { |
| "template": attr.label(mandatory = True, allow_single_file = True), |
| "type": attr.string(mandatory = True), |
| "output_type": attr.string(mandatory = True), |
| "platform": attr.string(mandatory = True), |
| "out": attr.output(mandatory = True), |
| }, |
| ) |
| |
| def _gen_mlir_op(name, type, platform, output_type): |
| _gen_mlir_op_rule( |
| name = "generate_{name}_{platform}_{type}_{output_type}_mlir".format( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ), |
| template = "op_definitions/{name}.mlir.tmpl".format(name = name), |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| out = "{name}_{platform}_{type}_{output_type}.mlir".format( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ), |
| ) |
| |
| ################################################################################ |
| # Kernels build rules. |
| ################################################################################ |
| |
| def if_mlir_experimental_kernels_enabled(if_true, if_false = []): |
| return select({ |
| "//tensorflow/core/kernels/mlir_generated:mlir_experimental_kernels_enabled": if_true, |
| "//conditions:default": if_false, |
| }) |
| |
| def _gen_kernel_fatbin_impl(ctx): |
| cc_toolchain = find_cpp_toolchain(ctx) |
| feature_configuration = cc_common.configure_features( |
| ctx = ctx, |
| cc_toolchain = cc_toolchain, |
| requested_features = ctx.features, |
| unsupported_features = ctx.disabled_features, |
| ) |
| name = ctx.attr.name |
| cmd_args = [] |
| if ctx.attr.unroll_factors: |
| cmd_args.append("--unroll_factors=%s" % ctx.attr.unroll_factors) |
| if ctx.attr.extra_args: |
| cmd_args.extend(ctx.attr.extra_args) |
| tile_sizes = ctx.attr.tile_size.replace("x", ",") |
| arch_flag = ",".join(ctx.attr.gpu_archs) |
| gpu_bin = ctx.outputs.kernel |
| |
| # cc_binary seems not to bring its dependencies with it, so do that explicitly here. |
| ctx.actions.run( |
| inputs = [ctx.file.mlir_op, ctx.file._tfso], |
| outputs = [gpu_bin], |
| executable = ctx.executable._tool, |
| arguments = cmd_args + [ |
| "--tile_sizes=%s" % tile_sizes, |
| "--arch=%s" % arch_flag, |
| "--input=%s" % ctx.file.mlir_op.path, |
| "--output=%s" % gpu_bin.path, |
| "--enable_ftz=%s" % (ctx.attr.data_type == "f32"), |
| ], |
| mnemonic = "compile", |
| ) |
| compilation_outputs = cc_common.create_compilation_outputs( |
| # We always produce PIC object files, so use the same object files for both. |
| objects = depset([gpu_bin]), |
| pic_objects = depset([gpu_bin]), |
| ) |
| (linking_context, linking_outputs) = cc_common.create_linking_context_from_compilation_outputs( |
| name = ctx.label.name, |
| actions = ctx.actions, |
| feature_configuration = feature_configuration, |
| cc_toolchain = cc_toolchain, |
| compilation_outputs = compilation_outputs, |
| ) |
| return [CcInfo(linking_context = linking_context)] |
| |
| _gen_kernel_fatbin_rule = rule( |
| attrs = { |
| "mlir_op": attr.label(mandatory = True, allow_single_file = True), |
| "data_type": attr.string(mandatory = True), |
| "tile_size": attr.string(mandatory = True), |
| "unroll_factors": attr.string(), |
| "gpu_archs": attr.string_list(mandatory = True), |
| "extra_args": attr.string_list(), |
| # cc_binary seems not to bring its dependencies with it, so do that explicitly here. |
| "_tfso": attr.label( |
| default = Label("//tensorflow:libtensorflow_framework.so.2"), |
| cfg = "host", |
| allow_single_file = True, |
| ), |
| "_tool": attr.label( |
| executable = True, |
| default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_kernel"), |
| cfg = "host", |
| ), |
| "_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"), |
| }, |
| fragments = ["cpp"], |
| outputs = {"kernel": "%{name}_kernel.o"}, |
| implementation = _gen_kernel_fatbin_impl, |
| ) |
| |
| def gen_kernel_library( |
| name, |
| types, |
| tile_size, |
| output_types = None, |
| tags = [], |
| platform = "gpu", |
| unroll_factors = None, |
| extra_args = []): |
| """ Generate a library with kernels for a specific tensorflow op. |
| |
| Args: |
| name: The name of the tensorflow op. |
| types: The types ("f16", "f32", "f64") for which a kernel should be generated. |
| tile_size: The tiling specification, e.g. "16x16". |
| output_types: The output types for which a kernel should be generated. If |
| specified, the i-th entry in types corresponds to the i-th |
| entry in output_types. By default, output_types = types is |
| assumed. |
| tags: The tags which should be added to the library. |
| platform: Platform for which to compile, i.e. "cpu" or "gpu" |
| unroll_factors: The unrolling specification, e.g. "4,4" |
| extra_args: Extra arguments to pass to the generator tool. |
| """ |
| |
| if not output_types: |
| output_types = types |
| if cuda_gpu_architectures() or rocm_gpu_architectures(): |
| for (type, output_type) in zip(types, output_types): |
| _gen_mlir_op( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ) |
| _gen_kernel_fatbin_rule( |
| name = "{name}_{platform}_{type}_{output_type}_kernel_generator".format( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ), |
| mlir_op = "{name}_{platform}_{type}_{output_type}.mlir".format( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ), |
| data_type = type, |
| gpu_archs = rocm_gpu_architectures() + cuda_gpu_architectures(), |
| tile_size = tile_size, |
| unroll_factors = unroll_factors, |
| extra_args = extra_args, |
| ) |
| |
| # We have to use a sh_test instead of build_test because it doesn't properly find the dependent targets. |
| native.sh_test( |
| name = "{name}_{platform}_{type}_{output_type}_gen_test".format( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ), |
| srcs = ["build_test.sh"], |
| tags = ["no_rocm"], |
| args = [ |
| "$(location //tensorflow/compiler/mlir/tools/kernel_gen:tf_to_kernel)", |
| "$(location {name}_{platform}_{type}_{output_type}.mlir)".format( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ), |
| ], |
| size = "medium", |
| data = [ |
| ":{name}_{platform}_{type}_{output_type}.mlir".format( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ), |
| "//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_kernel", |
| ], |
| ) |
| |
| native.cc_library( |
| name = name + "_kernels", |
| compatible_with = get_compatible_with_cloud(), |
| deps = if_gpu_is_configured([":{name}_{platform}_{type}_{output_type}_kernel_generator".format( |
| name = name, |
| platform = platform, |
| type = type, |
| output_type = output_type, |
| ) for (type, output_type) in zip(types, output_types)]), |
| linkstatic = 1, |
| tags = tags, |
| ) |