[ST] Create directory structure for ST (structured) dialect.

This is a dialect to which `linalg.tiled_loop` will be moved and transformed to a "subset"-based version.

PiperOrigin-RevId: 422132381
Change-Id: I8b6bd2403ca6949ce85c0780550cdeeee2e65491
diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD
index 4936528..878efae 100644
--- a/tensorflow/compiler/mlir/hlo/BUILD
+++ b/tensorflow/compiler/mlir/hlo/BUILD
@@ -2105,3 +2105,49 @@
         ":MhloOpsPyGen",
     ],
 )
+
+td_library(
+    name = "gml_st_ops_td_files",
+    srcs = glob(["include/mlir-hlo/Dialect/gml_st/IR/*.td"]),
+    compatible_with = get_compatible_with_cloud(),
+    includes = ["include"],
+    deps = [
+        "@llvm-project//mlir:OpBaseTdFiles",
+    ],
+)
+
+gentbl_cc_library(
+    name = "gml_st_ops_inc_gen",
+    compatible_with = get_compatible_with_cloud(),
+    strip_include_prefix = "include",
+    tbl_outs = [
+        (
+            ["-gen-op-decls"],
+            "include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.h.inc",
+        ),
+        (
+            ["-gen-op-defs"],
+            "include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.cc.inc",
+        ),
+    ],
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
+    td_file = "include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.td",
+    deps = [":gml_st_ops_td_files"],
+)
+
+cc_library(
+    name = "gml_st",
+    srcs = [
+        "include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.cc.inc",
+        "include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.h.inc",
+        "lib/Dialect/gml_st/IR/gml_st_ops.cc",
+    ],
+    hdrs = [
+        "include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.h",
+    ],
+    includes = ["include"],
+    deps = [
+        ":gml_st_ops_inc_gen",
+        "@llvm-project//mlir:IR",
+    ],
+)
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/CMakeLists.txt
index 46bd135..a9dbc0a 100644
--- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/CMakeLists.txt
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/CMakeLists.txt
@@ -14,6 +14,7 @@
 # limitations under the License.
 #
 add_subdirectory(disc-ral)
+add_subdirectory(gml_st)
 add_subdirectory(lhlo)
 add_subdirectory(lhlo_gpu)
 add_subdirectory(mhlo)
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/CMakeLists.txt
new file mode 100644
index 0000000..ab306a0
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_subdirectory(IR)
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/CMakeLists.txt
new file mode 100644
index 0000000..1ad2494
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set(LLVM_TARGET_DEFINITIONS gml_st_ops.td)
+mlir_tablegen(gml_st_ops.h.inc -gen-op-decls)
+mlir_tablegen(gml_st_ops.cc.inc -gen-op-defs)
+
+add_public_tablegen_target(MLIRgml_st_opsIncGen)
+add_dependencies(mlir-headers MLIRgml_st_opsIncGen)
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.h
new file mode 100644
index 0000000..bce6458
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.h
@@ -0,0 +1,39 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines the operations used in the GML ST dialect.
+
+#ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_IR_GML_ST_OPS_H_
+#define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_IR_GML_ST_OPS_H_
+
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/MLIRContext.h"
+
+namespace mlir {
+namespace gml_st {
+
+class GmlStDialect : public Dialect {
+ public:
+  explicit GmlStDialect(MLIRContext *context);
+  static StringRef getDialectNamespace() { return "gml_st"; }
+};
+
+}  // namespace gml_st
+}  // namespace mlir
+
+#define GET_OP_CLASSES
+#include "mlir-hlo/Dialect/gml_st/IR/gml_st_ops.h.inc"
+
+#endif  //  TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_IR_GML_ST_OPS_H_
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.td
new file mode 100644
index 0000000..fb375f3
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops.td
@@ -0,0 +1,29 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is the operation definition file for ST ops.
+
+#ifndef GML_ST_OPS
+#define GML_ST_OPS
+
+include "mlir/IR/OpBase.td"
+include "mlir-hlo/Dialect/gml_st/IR/gml_st_ops_base.td"
+
+class GMLST_Op<string mnemonic, list<OpTrait> traits> :
+    Op<GMLST_Dialect, mnemonic, traits> {
+  let verifier = [{ return Verify(*this); }];
+}
+
+#endif // GML_ST_OPS
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops_base.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops_base.td
new file mode 100644
index 0000000..0e59cee
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/IR/gml_st_ops_base.td
@@ -0,0 +1,28 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef GML_ST_OPS_BASE
+#define GML_ST_OPS_BASE
+
+include "mlir/IR/OpBase.td"
+
+def GMLST_Dialect : Dialect {
+  let name = "gml_st";
+  let cppNamespace = "::mlir::gml_st";
+
+  let emitAccessorPrefix = kEmitAccessorPrefix_Raw;
+}
+
+#endif // GML_ST_OPS_BASE
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/README.md b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/README.md
new file mode 100644
index 0000000..28569f8
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/gml_st/README.md
@@ -0,0 +1,173 @@
+# Google ML Structured Dialect
+ 
+The `gml_st` dialect will contain a loop-like construct and subset operations
+that should allow support for fusion beyond rectangular tiles. This is necessary
+for operations like `gather`, `scatter`, `concat` and more.
+
+## Overview
+### Tiling and fusion
+
+Tiling of an op is performed by creating a loop that computes subsets of the
+result. Usually the tiling is needed to enable vectorization or distribution.
+
+Before tiling
+
+```
+%0 = op(%input)
+```
+
+After tiling
+
+```
+loop (%ivs)
+  %1 = subset(%input, %ivs)
+  %2 = op (%1)
+```
+
+Fusion of a producer op into a tiled consumer consists of two main parts:
+computing subsets of producer's operands and moving the producer op into the
+loop body so that it operates on the subsets of its original operands.
+
+After consumer tiling
+```
+%0 = producer (%input)
+loop (%ivs)
+  %1 = subset(%0, %ivs)
+  %2 = consumer(%1)
+```
+
+After producer fusion
+
+```
+loop (%ivs)
+  %0 = subset(%input, %ivs)
+  %1 = producer(%0)
+  %2 = consumer (%1)
+```
+
+There is some duality between tiling and fusion. One can consider tiling as
+fusion of the op into a loop that partitions the iteration space and just
+returns identity for every subset. On the other hand, fusion can be seen as
+tiling of the producer and then merging of the loop bodies.
+
+### Subset operations
+
+Linalg has support for hyperrectangular subsets (tiles) of tensor/memref
+operands. Currently, Linalg's fusion assumes that the tiling is performed only
+using `tensor.extract_slice/tensor.insert_slice` and `memref.subview`
+operations.
+There are several disadvantages to that approach:
+
+If some of the operands are not affected by tiling, i.e. the tiling was
+performed along dimensions that are not present in the operand, then we cannot
+fuse anymore the producer of the operand. That can happen when `linalg.generic`
+broadcasts one of the operands or when the output is tiled, but not the
+reduction dimensions
+
+Support for fusion with ops like `gather`, `scatter`, `concat` for some of the
+cases can only be done via `TilingInterface`
+([RFC](https://llvm.discourse.group/t/rfc-for-tilinginterface-for-tiling-operations-that-dont-fit-into-linalg-structured-operation-definition/3897/7)).
+
+**Example of a tiled op**
+
+```
+%sum = linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c80, %c60) step (%c4, %c4)
+          ins (%in_ = %in: tensor<80x60xf32>, %cst_ = %cst: f32)
+          outs (%out_ = %out: tensor<80xf32>)
+          iterators["parallel", "reduction"] {
+  %in_sub = tensor.extract_slice %in_[%i, %j] [4, 4] [1, 1]
+      : tensor<80x60xf32> to tensor<4x4xf32>
+  %out_sub = tensor.extract_slice %out_[%i] [4] [1]
+      : tensor<80xf32> to tensor<4xf32>
+  %reduction = linalg.generic {
+      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                       affine_map<(d0, d1) -> (d0)>],
+      iterator_types = ["parallel", "reduction"]}
+      ins(%in_sub : tensor<4x4xf32>)
+      outs(%out_sub : tensor<4xf32>) {
+    ^bb0(%a: f32, %b: f32):
+      %0 = arith.addf %a, %b : f32
+      linalg.yield %0 : f32
+  } -> tensor<4xf32>
+  %update = tensor.insert_slice %reduction into %out_[%i] [4] [1]
+      : tensor<4xf32> into tensor<80xf32>
+  linalg.yield %update : tensor<80xf32>
+}
+```
+
+The body of this loop models read-modify-write of the output tensor. The tile
+that we extract from `%out_` should have the same sizes/offsets/strides as the
+destination of `tensor.insert_slice`. The arguments of `tensor.extract_slice`
+and `tensor.insert_slice` are currently not required to encode the same tile.
+
+We introduce new operations that define subsets on tensors/memrefs
+
+ * `subset.full %tensor` - the subset spans the original tensor fully
+ * `subset.tile %tensor [%offsets][%sizes][%strides]` - defines a rectangular
+   tile
+ * `subset.filter %tensor[%indices]` - the subset has the same shape as the
+   original tensor, but only the values at %indices are populated. This can be a
+   sparse tensor.
+ * `subset.point %tensor[%index]` - the subset contains a single element
+
+### Structured loop
+
+We introduce `gml_st.loop` that keeps the subset definition separately from the
+materialization.
+
+`linalg.generic` has `AffineMap` attributes that specify the indexing maps and a
+region that models the computation on the element types of the operand
+tensors/memrefs. The region ends with `linalg.yield` terminator that yields the
+element of the output. The load and store ops in that case are implicit, so
+are extraction/insertion in `gml_st.loop`.
+
+`gml_st.loop` has one region that contains subset operations to define the
+dense/sparse ranges that we are working with and also `gml_st.materialize` ops
+to convert subset spec to a tensor or memref.
+
+`gml_st.yield` is the terminator for `gml_st.loop` that takes computed tensors
+and a subset specification for which the computation was done. Note that this
+way we don't have to explicitly write a destructive update with
+`tensor.insert_slice` and then yield a full tensor. Here, we yield values for a
+subset.
+
+
+```
+%sum = gml_st.loop (%i, %j) = (%c0, %c0) to (%c80, %c60) step (%c4, %c4)
+           ins (%in_ = %in: tensor<80x60xf32>, %cst_ = %cst: f32)
+           outs (%out_ = %out: tensor<80xf32>)
+           iterators["parallel", "sequential"] {
+  %in_tile = gml_st.tile %in_[%i, %j] [4, 4] [1, 1]
+      : tensor<80x60xf32> to !gml_st.subset<4x4xf32>
+  %out_tile = gml_st.tile %out_[%i] [4] [1]
+      : tensor<80xf32> to !gml_st.subset<4xf32>
+
+  %in_sub = gml_st.materialize %in_tile
+      : !gml_st.subset<4x4xf32> to tensor<4x4xf32>
+  %out_sub = gml_st.materialize %in_tile
+      : !gml_st.subset<4xf32> to tensor<4xf32>
+  %reduction = linalg.generic {
+      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                       affine_map<(d0, d1) -> (d0)>],
+      iterator_types = ["parallel", "reduction"]}
+      ins(%in_sub : tensor<4x4xf32>)
+      outs(%out_sub : tensor<4xf32>) {
+    ^bb0(%a: f32, %b: f32):
+      %0 = arith.addf %a, %b : f32
+      linalg.yield %0 : f32
+  } -> tensor<4xf32>
+  gml_st.yield %reduction to %out_tile
+      : tensor<4xf32> to !gml_st.subset<4xf32>
+}
+```
+
+Currently, tiling of the consumer and fusion of its producers are tightly
+coupled. If the fusion is happening not in the same pass, then some analysis is
+required to find the [consumer - `tensor.extract_slice` - producer] triple to
+perform the fusion. Keeping the subset computations separately from the
+"compute" ops not only improves readablity but also simplifies fusion, since we
+have a subset computation per operand and we can just specify what argument of
+the loop we want to fuse.
+
+It also simplifies the bufferization, since we don't need to introduce the
+additional operations in MemRef dialect for every subset operation in TensorOps.
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/Dialect/CMakeLists.txt
index 46bd135..b3b9e0d 100644
--- a/tensorflow/compiler/mlir/hlo/lib/Dialect/CMakeLists.txt
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/CMakeLists.txt
@@ -1,4 +1,3 @@
-#
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +13,7 @@
 # limitations under the License.
 #
 add_subdirectory(disc-ral)
+add_subdirectory(gml_st)
 add_subdirectory(lhlo)
 add_subdirectory(lhlo_gpu)
 add_subdirectory(mhlo)
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/CMakeLists.txt
new file mode 100644
index 0000000..ab306a0
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_subdirectory(IR)
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/IR/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/IR/CMakeLists.txt
new file mode 100644
index 0000000..ef7dfae
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/IR/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include_directories(BEFORE
+    ${CMAKE_CURRENT_BINARY_DIR}
+    ${CMAKE_CURRENT_SOURCE_DIR})
+
+add_mlir_dialect_library(GmlStDialect
+  gml_st_ops.cc
+
+  DEPENDS
+  MLIRgml_st_opsIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+)
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/IR/gml_st_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/IR/gml_st_ops.cc
new file mode 100644
index 0000000..721ad5f
--- /dev/null
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/gml_st/IR/gml_st_ops.cc
@@ -0,0 +1,35 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines the operations used in the ST dialect.
+
+#include "mlir-hlo/Dialect/gml_st/IR/gml_st_ops.h"
+
+namespace mlir {
+namespace gml_st {
+
+GmlStDialect::GmlStDialect(MLIRContext* context)
+    : Dialect(getDialectNamespace(), context, TypeID::get<GmlStDialect>()) {
+  addOperations<
+#define GET_OP_LIST
+#include "mlir-hlo/Dialect/gml_st/IR/gml_st_ops.cc.inc"
+      >();
+}
+
+}  // namespace gml_st
+}  // namespace mlir
+
+#define GET_OP_CLASSES
+#include "mlir-hlo/Dialect/gml_st/IR/gml_st_ops.cc.inc"