Lower scalar parts of CFG functions to LLVM IR

Initial restricted implementaiton of the MLIR to LLVM IR translation.
Introduce a new flow into the mlir-translate tool taking an MLIR module
containing CFG functions only and producing and LLVM IR module.  The MLIR
features supported by the translator are as follows:
- primitive and function types;
- integer constants;
- cfg and ext functions with 0 or 1 return values;
- calls to these functions;
- basic block conversion translation of arguments to phi nodes;
- conversion between arguments of the first basic block and function arguments;
- (conditional) branches;
- integer addition and comparison operations.

Are NOT supported:
- vector and tensor types and operations on them;
- memrefs and operations on them;
- allocations;
- functions returning multiple values;
- LLVM Module triple and data layout (index type is hardcoded to i64).

Create a new MLIR library and place it under lib/Target/LLVMIR.  The "Target"
library group is similar to the one present in LLVM and is intended to contain
all future public MLIR translation targets.

The general flow of MLIR to LLVM IR convresion will include several lowering
and simplification passes on the MLIR itself in order to make the translation
as simple as possible.  In particular, ML functions should be transformed to
CFG functions by the recently introduced pass, operations on structured types
will be converted to sequences of operations on primitive types, complex
operations such as affine_apply will be converted into sequence of primitive
operations, primitive operations themselves may eventually be converted to an
LLVM dialect that uses LLVM-like operations.

Introduce the first translation test so that further changes make sure the
basic translation functionality is not broken.

PiperOrigin-RevId: 222400112
diff --git a/include/mlir/Target/LLVMIR.h b/include/mlir/Target/LLVMIR.h
new file mode 100644
index 0000000..353abe8
--- /dev/null
+++ b/include/mlir/Target/LLVMIR.h
@@ -0,0 +1,48 @@
+//===- LLVMIR.h - MLIR to LLVM IR conversion --------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file declares the entry point for the MLIR to LLVM IR conversion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVMIR_H
+#define MLIR_TARGET_LLVMIR_H
+
+#include <memory>
+
+// Forward-declare LLVM classses.
+namespace llvm {
+class LLVMContext;
+class Module;
+} // namespace llvm
+
+namespace mlir {
+
+class Module;
+
+/// Convert the given MLIR module into LLVM IR.  Create an LLVM IR module in
+/// "llvmContext" and return a unique pointer to it.  The MLIR module is not
+/// allowed to contain MLFunctions, lower them to CFGFunctions beforehand using
+/// an appropriate pass.  In case of error, report it to the error handler
+/// registered with the MLIR context, if any (obtained from the MLIR module),
+/// and return `nullptr`.
+std::unique_ptr<llvm::Module>
+convertModuleToLLVMIR(Module &module, llvm::LLVMContext &llvmContext);
+
+} // namespace mlir
+
+#endif // MLIR_TARGET_LLVMIR_H
diff --git a/lib/Target/LLVMIR/ConvertToLLVMIR.cpp b/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
new file mode 100644
index 0000000..c0e30f0
--- /dev/null
+++ b/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
@@ -0,0 +1,422 @@
+//===- ConvertToLLVMIR.cpp - MLIR to LLVM IR conversion ---------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements a pass that converts CFG function to LLVM IR.  No ML
+// functions must be presented in MLIR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/CFGFunction.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Module.h"
+#include "mlir/StandardOps/StandardOps.h"
+#include "mlir/Support/FileUtilities.h"
+#include "mlir/Support/Functional.h"
+#include "mlir/Target/LLVMIR.h"
+#include "mlir/Translation.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+
+using namespace mlir;
+
+namespace {
+class ModuleLowerer {
+public:
+  explicit ModuleLowerer(llvm::LLVMContext &llvmContext)
+      : llvmContext(llvmContext), builder(llvmContext) {}
+
+  bool runOnModule(Module &m, llvm::Module &llvmModule);
+
+private:
+  bool convertBasicBlock(const BasicBlock &bb, bool ignoreArguments = false);
+  bool convertCFGFunction(const CFGFunction &cfgFunc, llvm::Function &llvmFunc);
+  bool convertFunctions(const Module &mlirModule, llvm::Module &llvmModule);
+  bool convertInstruction(const Instruction &inst);
+
+  void connectPHINodes(const CFGFunction &cfgFunc);
+
+  /// Type conversion functions.  If any conversion fails, report errors to the
+  /// context of the MLIR type and return nullptr.
+  /// \{
+  llvm::FunctionType *convertFunctionType(FunctionType type);
+  llvm::IntegerType *convertIndexType(IndexType type);
+  llvm::IntegerType *convertIntegerType(IntegerType type);
+  llvm::Type *convertFloatType(FloatType type);
+  llvm::Type *convertType(Type type);
+  /// \}
+
+  llvm::DenseMap<const Function *, llvm::Function *> functionMapping;
+  llvm::DenseMap<const SSAValue *, llvm::Value *> valueMapping;
+  llvm::DenseMap<const BasicBlock *, llvm::BasicBlock *> blockMapping;
+  llvm::LLVMContext &llvmContext;
+  llvm::IRBuilder<llvm::ConstantFolder, llvm::IRBuilderDefaultInserter> builder;
+  llvm::IntegerType *indexType;
+};
+
+llvm::IntegerType *ModuleLowerer::convertIndexType(IndexType type) {
+  return indexType;
+}
+
+llvm::IntegerType *ModuleLowerer::convertIntegerType(IntegerType type) {
+  return builder.getIntNTy(type.getBitWidth());
+}
+
+llvm::Type *ModuleLowerer::convertFloatType(FloatType type) {
+  MLIRContext *context = type.getContext();
+  switch (type.getKind()) {
+  case Type::Kind::F32:
+    return builder.getFloatTy();
+  case Type::Kind::F64:
+    return builder.getDoubleTy();
+  case Type::Kind::F16:
+    return builder.getHalfTy();
+  case Type::Kind::BF16:
+    return context->emitError(UnknownLoc::get(context),
+                              "Unsupported type: BF16"),
+           nullptr;
+  default:
+    llvm_unreachable("non-float type in convertFloatType");
+  }
+}
+
+llvm::FunctionType *ModuleLowerer::convertFunctionType(FunctionType type) {
+  // TODO(zinenko): convert tuple to LLVM structure types
+  assert(type.getNumResults() <= 1 && "NYI: tuple returns");
+  auto resultType = type.getNumResults() == 0
+                        ? llvm::Type::getVoidTy(llvmContext)
+                        : convertType(type.getResult(0));
+  if (!resultType)
+    return nullptr;
+
+  auto argTypes =
+      functional::map([this](Type inputType) { return convertType(inputType); },
+                      type.getInputs());
+  if (std::any_of(argTypes.begin(), argTypes.end(),
+                  [](const llvm::Type *t) { return t == nullptr; }))
+    return nullptr;
+
+  return llvm::FunctionType::get(resultType, argTypes, /*isVarArg=*/false);
+}
+
+llvm::Type *ModuleLowerer::convertType(Type type) {
+  if (auto funcType = type.dyn_cast<FunctionType>())
+    return convertFunctionType(funcType);
+  if (auto intType = type.dyn_cast<IntegerType>())
+    return convertIntegerType(intType);
+  if (auto floatType = type.dyn_cast<FloatType>())
+    return convertFloatType(floatType);
+  if (auto indexType = type.dyn_cast<IndexType>())
+    return convertIndexType(indexType);
+
+  MLIRContext *context = type.getContext();
+  std::string message;
+  llvm::raw_string_ostream os(message);
+  os << "unsupported type: ";
+  type.print(os);
+  context->emitError(UnknownLoc::get(context), os.str());
+  return nullptr;
+}
+
+static llvm::CmpInst::Predicate getLLVMCmpPredicate(CmpIPredicate p) {
+  switch (p) {
+  case CmpIPredicate::EQ:
+    return llvm::CmpInst::Predicate::ICMP_EQ;
+  case CmpIPredicate::NE:
+    return llvm::CmpInst::Predicate::ICMP_NE;
+  case CmpIPredicate::SLT:
+    return llvm::CmpInst::Predicate::ICMP_SLT;
+  case CmpIPredicate::SLE:
+    return llvm::CmpInst::Predicate::ICMP_SLE;
+  case CmpIPredicate::SGT:
+    return llvm::CmpInst::Predicate::ICMP_SGT;
+  case CmpIPredicate::SGE:
+    return llvm::CmpInst::Predicate::ICMP_SGE;
+  case CmpIPredicate::ULT:
+    return llvm::CmpInst::Predicate::ICMP_ULT;
+  case CmpIPredicate::ULE:
+    return llvm::CmpInst::Predicate::ICMP_ULE;
+  case CmpIPredicate::UGT:
+    return llvm::CmpInst::Predicate::ICMP_UGT;
+  case CmpIPredicate::UGE:
+    return llvm::CmpInst::Predicate::ICMP_UGE;
+  default:
+    llvm_unreachable("incorrect comparison predicate");
+  }
+}
+
+// Convert specific operation instruction types LLVM instructions.
+// FIXME(zinenko): this should eventually become a separate MLIR pass that
+// converts MLIR standard operations into LLVM IR dialect; the translation in
+// that case would become a simple 1:1 instruction and value remapping.
+bool ModuleLowerer::convertInstruction(const Instruction &inst) {
+  if (auto op = inst.dyn_cast<AddIOp>())
+    return valueMapping[op->getResult()] =
+               builder.CreateAdd(valueMapping[op->getOperand(0)],
+                                 valueMapping[op->getOperand(1)]),
+           false;
+  if (auto op = inst.dyn_cast<MulIOp>())
+    return valueMapping[op->getResult()] =
+               builder.CreateMul(valueMapping[op->getOperand(0)],
+                                 valueMapping[op->getOperand(1)]),
+           false;
+  if (auto op = inst.dyn_cast<CmpIOp>())
+    return valueMapping[op->getResult()] =
+               builder.CreateICmp(getLLVMCmpPredicate(op->getPredicate()),
+                                  valueMapping[op->getOperand(0)],
+                                  valueMapping[op->getOperand(1)]),
+           false;
+  if (auto constantOp = inst.dyn_cast<ConstantOp>()) {
+    llvm::Type *type = convertType(constantOp->getType());
+    if (!type)
+      return true;
+    assert(isa<llvm::IntegerType>(type) &&
+           "only integer LLVM types are supported");
+    auto attr = (constantOp->getValue()).cast<IntegerAttr>();
+    // Create a new APInt even if we can extract one from the attribute, because
+    // attributes are currently hardcoded to be 64-bit APInts and LLVM will
+    // create an i64 constant from those.
+    valueMapping[constantOp->getResult()] = llvm::Constant::getIntegerValue(
+        type, llvm::APInt(type->getIntegerBitWidth(), attr.getInt()));
+    return false;
+  }
+  if (auto callOp = inst.dyn_cast<CallOp>()) {
+    auto operands = functional::map(
+        [this](const SSAValue *value) { return valueMapping.lookup(value); },
+        callOp->getOperands());
+    auto numResults = callOp->getNumResults();
+    // TODO(zinenko): support tuple returns
+    assert(numResults <= 1 && "NYI: tuple returns");
+
+    llvm::Value *result =
+        builder.CreateCall(functionMapping[callOp->getCallee()], operands);
+    if (numResults == 1)
+      valueMapping[callOp->getResult(0)] = result;
+    return false;
+  }
+
+  // Terminators.
+  if (auto returnInst = inst.dyn_cast<ReturnOp>()) {
+    unsigned numOperands = returnInst->getNumOperands();
+    // TODO(zinenko): support tuple returns
+    assert(numOperands <= 1u && "NYI: tuple returns");
+
+    if (numOperands == 0)
+      builder.CreateRetVoid();
+    else
+      builder.CreateRet(valueMapping[returnInst->getOperand(0)]);
+
+    return false;
+  }
+  if (auto branchInst = inst.dyn_cast<BranchOp>()) {
+    builder.CreateBr(blockMapping[branchInst->getDest()]);
+    return false;
+  }
+  if (auto condBranchInst = inst.dyn_cast<CondBranchOp>()) {
+    builder.CreateCondBr(valueMapping[condBranchInst->getCondition()],
+                         blockMapping[condBranchInst->getTrueDest()],
+                         blockMapping[condBranchInst->getFalseDest()]);
+    return false;
+  }
+  inst.emitError("unsupported operation");
+  return true;
+}
+
+bool ModuleLowerer::convertBasicBlock(const BasicBlock &bb,
+                                      bool ignoreArguments) {
+  builder.SetInsertPoint(blockMapping[&bb]);
+
+  // Before traversing instructions, make block arguments available through
+  // value remapping and PHI nodes, but do not add incoming edges for the PHI
+  // nodes just yet: those values may be defined by this or following blocks.
+  // This step is omitted if "ignoreArguments" is set.  The arguments of the
+  // first basic block have been already made available through the remapping of
+  // LLVM function arguments.
+  if (!ignoreArguments) {
+    auto predecessors = bb.getPredecessors();
+    unsigned numPredecessors =
+        std::distance(predecessors.begin(), predecessors.end());
+    for (const auto *arg : bb.getArguments()) {
+      llvm::Type *type = convertType(arg->getType());
+      if (!type)
+        return true;
+      llvm::PHINode *phi = builder.CreatePHI(type, numPredecessors);
+      valueMapping[arg] = phi;
+    }
+  }
+
+  // Traverse instructions.
+  for (const auto &inst : bb) {
+    if (convertInstruction(inst))
+      return true;
+  }
+
+  return false;
+}
+
+// Get the SSA value passed to the current block from the terminator instruction
+// of its predecessor.
+static const SSAValue *getPHISourceValue(const BasicBlock *current,
+                                         const BasicBlock *pred,
+                                         unsigned numArguments,
+                                         unsigned index) {
+  const Instruction &terminator = *pred->getTerminator();
+  if (terminator.isa<BranchOp>()) {
+    return terminator.getOperand(index);
+  }
+
+  // For conditional branches, we need to check if the current block is reached
+  // through the "true" or the "false" branch and take the relevant operands.
+  auto condBranchOp = terminator.dyn_cast<CondBranchOp>();
+  assert(condBranchOp &&
+         "only branch instructions can be terminators of a basic block that "
+         "has successors");
+
+  condBranchOp->emitError("NYI: conditional branches with arguments");
+  return nullptr;
+}
+
+void ModuleLowerer::connectPHINodes(const CFGFunction &cfgFunc) {
+  // Skip the first block, it cannot be branched to and its arguments correspond
+  // to the arguments of the LLVM function.
+  for (auto it = std::next(cfgFunc.begin()), eit = cfgFunc.end(); it != eit;
+       ++it) {
+    const BasicBlock *bb = &*it;
+    llvm::BasicBlock *llvmBB = blockMapping[bb];
+    auto phis = llvmBB->phis();
+    auto numArguments = bb->getNumArguments();
+    assert(numArguments == std::distance(phis.begin(), phis.end()));
+    for (auto &numberedPhiNode : llvm::enumerate(phis)) {
+      auto &phiNode = numberedPhiNode.value();
+      unsigned index = numberedPhiNode.index();
+      for (const auto *pred : bb->getPredecessors()) {
+        phiNode.addIncoming(
+            valueMapping[getPHISourceValue(bb, pred, numArguments, index)],
+            blockMapping[pred]);
+      }
+    }
+  }
+}
+
+bool ModuleLowerer::convertCFGFunction(const CFGFunction &cfgFunc,
+                                       llvm::Function &llvmFunc) {
+  // Clear the block mapping.  Blocks belong to a function, no need to keep
+  // blocks from the previous functions around.  Furthermore, we use this
+  // mapping to connect PHI nodes inside the function later.
+  blockMapping.clear();
+  // First, create all blocks so we can jump to them.
+  for (const auto &bb : cfgFunc) {
+    auto *llvmBB = llvm::BasicBlock::Create(llvmContext);
+    llvmBB->insertInto(&llvmFunc);
+    blockMapping[&bb] = llvmBB;
+  }
+
+  // Then, convert blocks one by one.
+  for (auto indexedBB : llvm::enumerate(cfgFunc)) {
+    const auto &bb = indexedBB.value();
+    if (convertBasicBlock(bb, /*ignoreArguments=*/indexedBB.index() == 0))
+      return true;
+  }
+
+  // Finally, after all blocks have been traversed and values mapped, connect
+  // the PHI nodes to the results of preceding blocks.
+  connectPHINodes(cfgFunc);
+  return false;
+}
+
+bool ModuleLowerer::convertFunctions(const Module &mlirModule,
+                                     llvm::Module &llvmModule) {
+  // Declare all functions first because there may be function calls that form a
+  // call graph with cycles.  We don't expect MLFunctions here.
+  for (const Function &function : mlirModule) {
+    const Function *functionPtr = &function;
+    if (!isa<ExtFunction>(functionPtr) && !isa<CFGFunction>(functionPtr))
+      continue;
+    llvm::Constant *llvmFuncCst = llvmModule.getOrInsertFunction(
+        function.getName(), convertFunctionType(function.getType()));
+    assert(isa<llvm::Function>(llvmFuncCst));
+    functionMapping[functionPtr] = cast<llvm::Function>(llvmFuncCst);
+  }
+
+  // Convert CFG functions.
+  for (const Function &function : mlirModule) {
+    const Function *functionPtr = &function;
+    auto cfgFunction = dyn_cast<CFGFunction>(functionPtr);
+    if (!cfgFunction)
+      continue;
+    llvm::Function *llvmFunc = functionMapping[cfgFunction];
+
+    // Add function arguments to the value remapping table.  In CFGFunction,
+    // arguments of the first block are those of the function.
+    assert(!cfgFunction->getBlocks().empty() &&
+           "expected at least one basic block in a CFGFunction");
+    const BasicBlock &firstBlock = *cfgFunction->begin();
+    for (auto arg : llvm::enumerate(llvmFunc->args())) {
+      valueMapping[firstBlock.getArgument(arg.index())] = &arg.value();
+    }
+
+    if (convertCFGFunction(*cfgFunction, *functionMapping[cfgFunction]))
+      return true;
+  }
+  return false;
+}
+
+bool ModuleLowerer::runOnModule(Module &m, llvm::Module &llvmModule) {
+  // Create index type once for the entire module, it needs module info that is
+  // not available in the convert*Type calls.
+  indexType =
+      builder.getIntNTy(llvmModule.getDataLayout().getPointerSizeInBits());
+
+  return convertFunctions(m, llvmModule);
+}
+} // namespace
+
+// Entry point for the lowering procedure.
+std::unique_ptr<llvm::Module>
+mlir::convertModuleToLLVMIR(Module &module, llvm::LLVMContext &llvmContext) {
+  auto llvmModule = llvm::make_unique<llvm::Module>("FIXME_name", llvmContext);
+  if (ModuleLowerer(llvmContext).runOnModule(module, *llvmModule))
+    return nullptr;
+  return llvmModule;
+}
+
+// MLIR to LLVM IR translation registration.
+static TranslateFromMLIRRegistration MLIRToLLVMIRTranslate(
+    "mlir-to-llvmir", [](Module *module, llvm::StringRef outputFilename) {
+      if (!module)
+        return true;
+
+      llvm::LLVMContext llvmContext;
+      auto llvmModule = convertModuleToLLVMIR(*module, llvmContext);
+      if (!llvmModule)
+        return true;
+
+      auto file = openOutputFile(outputFilename);
+      if (!file)
+        return true;
+
+      llvmModule->print(file->os(), nullptr);
+      file->keep();
+      return false;
+    });
diff --git a/test/Target/llvmir.mlir b/test/Target/llvmir.mlir
new file mode 100644
index 0000000..fb30385
--- /dev/null
+++ b/test/Target/llvmir.mlir
@@ -0,0 +1,308 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: define void @empty() {
+// CHECK-NEXT:    ret void
+// CHECK-NEXT:  }
+cfgfunc @empty() {
+bb0:
+  return
+}
+
+// CHECK-LABEL: declare void @body(i64)
+extfunc @body(index)
+
+
+// CHECK-LABEL: define void @simple_loop() {
+cfgfunc @simple_loop() {
+bb0:
+// CHECK: br label %[[SIMPLE_BB1:[0-9]+]]
+  br bb1
+
+// Constants are inlined in LLVM rather than a separate instruction.
+// CHECK: <label>:[[SIMPLE_BB1]]:
+// CHECK-NEXT: br label %[[SIMPLE_BB2:[0-9]+]]
+bb1:	// pred: bb0
+  %c1 = constant 1 : index
+  %c42 = constant 42 : index
+  br bb2(%c1 : index)
+
+// CHECK: <label>:[[SIMPLE_BB2]]:
+// CHECK-NEXT:   %{{[0-9]+}} = phi i64 [ %{{[0-9]+}}, %[[SIMPLE_BB3:[0-9]+]] ], [ 1, %[[SIMPLE_BB1]] ]
+// CHECK-NEXT:   %{{[0-9]+}} = icmp slt i64 %{{[0-9]+}}, 42
+// CHECK-NEXT:   br i1 %{{[0-9]+}}, label %[[SIMPLE_BB3]], label %[[SIMPLE_BB4:[0-9]+]]
+bb2(%0: index):	// 2 preds: bb1, bb3
+  %1 = cmpi "slt", %0, %c42 : index
+  cond_br %1, bb3, bb4
+
+// CHECK: ; <label>:[[SIMPLE_BB3]]:
+// CHECK-NEXT:   call void @body(i64 %{{[0-9]+}})
+// CHECK-NEXT:   %{{[0-9]+}} = add i64 %{{[0-9]+}}, 1
+// CHECK-NEXT:   br label %[[SIMPLE_BB2]]
+bb3:	// pred: bb2
+  call @body(%0) : (index) -> ()
+  %c1_0 = constant 1 : index
+  %2 = addi %0, %c1_0 : index
+  br bb2(%2 : index)
+
+// CHECK: ; <label>:[[SIMPLE_BB4]]:
+// CHECK-NEXT:    ret void
+bb4:	// pred: bb2
+  return
+}
+
+// CHECK-LABEL: define void @simple_caller() {
+// CHECK-NEXT:   call void @simple_loop()
+// CHECK-NEXT:   ret void
+// CHECK-NEXT: }
+cfgfunc @simple_caller() {
+bb0:
+  call @simple_loop() : () -> ()
+  return
+}
+
+//cfgfunc @simple_indirect_caller() {
+//bb0:
+//  %f = constant @simple_loop : () -> ()
+//  call_indirect %f() : () -> ()
+//  return
+//}
+
+// CHECK-LABEL: define void @ml_caller() {
+// CHECK-NEXT:   call void @simple_loop()
+// CHECK-NEXT:   call void @more_imperfectly_nested_loops()
+// CHECK-NEXT:   ret void
+// CHECK-NEXT: }
+cfgfunc @ml_caller() {
+bb0:
+  call @simple_loop() : () -> ()
+  call @more_imperfectly_nested_loops() : () -> ()
+  return
+}
+
+// CHECK-LABEL: declare i64 @body_args(i64)
+extfunc @body_args(index) -> index
+// CHECK-LABEL: declare i32 @other(i64, i32)
+extfunc @other(index, i32) -> i32
+
+// CHECK-LABEL: define i32 @mlfunc_args(i32, i32) {
+// CHECK-NEXT: br label %[[ARGS_BB1:[0-9]+]]
+cfgfunc @mlfunc_args(i32, i32) -> i32 {
+bb0(%arg0: i32, %arg1: i32):
+  %c0_i32 = constant 0 : i32
+  br bb1
+
+// CHECK: <label>:[[ARGS_BB1]]:
+// CHECK-NEXT: br label %[[ARGS_BB2:[0-9]+]]
+bb1:	// pred: bb0
+  %c0 = constant 0 : index
+  %c42 = constant 42 : index
+  br bb2(%c0 : index)
+
+// CHECK: <label>:[[ARGS_BB2]]:
+// CHECK-NEXT:   %5 = phi i64 [ %12, %[[ARGS_BB3:[0-9]+]] ], [ 0, %[[ARGS_BB1]] ]
+// CHECK-NEXT:   %6 = icmp slt i64 %5, 42
+// CHECK-NEXT:   br i1 %6, label %[[ARGS_BB3]], label %[[ARGS_BB4:[0-9]+]]
+bb2(%0: index):	// 2 preds: bb1, bb3
+  %1 = cmpi "slt", %0, %c42 : index
+  cond_br %1, bb3, bb4
+
+// CHECK: <label>:[[ARGS_BB3]]:
+// CHECK-NEXT:   %8 = call i64 @body_args(i64 %5)
+// CHECK-NEXT:   %9 = call i32 @other(i64 %8, i32 %0)
+// CHECK-NEXT:   %10 = call i32 @other(i64 %8, i32 %9)
+// CHECK-NEXT:   %11 = call i32 @other(i64 %8, i32 %1)
+// CHECK-NEXT:   %12 = add i64 %5, 1
+// CHECK-NEXT:   br label %[[ARGS_BB2]]
+bb3:	// pred: bb2
+  %2 = call @body_args(%0) : (index) -> index
+  %3 = call @other(%2, %arg0) : (index, i32) -> i32
+  %4 = call @other(%2, %3) : (index, i32) -> i32
+  %5 = call @other(%2, %arg1) : (index, i32) -> i32
+  %c1 = constant 1 : index
+  %6 = addi %0, %c1 : index
+  br bb2(%6 : index)
+
+// CHECK: <label>:[[ARGS_BB4]]:
+// CHECK-NEXT:   %14 = call i32 @other(i64 0, i32 0)
+// CHECK-NEXT:   ret i32 %14
+bb4:	// pred: bb2
+  %c0_0 = constant 0 : index
+  %7 = call @other(%c0_0, %c0_i32) : (index, i32) -> i32
+  return %7 : i32
+}
+
+// CHECK: declare void @pre(i64)
+extfunc @pre(index)
+
+// CHECK: declare void @body2(i64, i64)
+extfunc @body2(index, index)
+
+// CHECK: declare void @post(i64)
+extfunc @post(index)
+
+// CHECK-LABEL: define void @imperfectly_nested_loops() {
+// CHECK-NEXT:   br label %[[IMPER_BB1:[0-9]+]]
+cfgfunc @imperfectly_nested_loops() {
+bb0:
+  br bb1
+
+// CHECK: <label>:[[IMPER_BB1]]:
+// CHECK-NEXT:   br label %[[IMPER_BB2:[0-9]+]]
+bb1:	// pred: bb0
+  %c0 = constant 0 : index
+  %c42 = constant 42 : index
+  br bb2(%c0 : index)
+
+// CHECK: <label>:[[IMPER_BB2]]:
+// CHECK-NEXT:   %3 = phi i64 [ %13, %[[IMPER_BB7:[0-9]+]] ], [ 0, %[[IMPER_BB1]] ]
+// CHECK-NEXT:   %4 = icmp slt i64 %3, 42
+// CHECK-NEXT:   br i1 %4, label %[[IMPER_BB3:[0-9]+]], label %[[IMPER_BB8:[0-9]+]]
+bb2(%0: index):	// 2 preds: bb1, bb7
+  %1 = cmpi "slt", %0, %c42 : index
+  cond_br %1, bb3, bb8
+
+// CHECK: <label>:[[IMPER_BB3]]:
+// CHECK-NEXT:   call void @pre(i64 %3)
+// CHECK-NEXT:   br label %[[IMPER_BB4:[0-9]+]]
+bb3:	// pred: bb2
+  call @pre(%0) : (index) -> ()
+  br bb4
+
+// CHECK: <label>:[[IMPER_BB4]]:
+// CHECK-NEXT:   br label %[[IMPER_BB5:[0-9]+]]
+bb4:	// pred: bb3
+  %c7 = constant 7 : index
+  %c56 = constant 56 : index
+  br bb5(%c7 : index)
+
+// CHECK: <label>:[[IMPER_BB5]]:
+// CHECK-NEXT:   %8 = phi i64 [ %11, %[[IMPER_BB6:[0-9]+]] ], [ 7, %[[IMPER_BB4]] ]
+// CHECK-NEXT:   %9 = icmp slt i64 %8, 56
+// CHECK-NEXT:   br i1 %9, label %[[IMPER_BB6]], label %[[IMPER_BB7]]
+bb5(%2: index):	// 2 preds: bb4, bb6
+  %3 = cmpi "slt", %2, %c56 : index
+  cond_br %3, bb6, bb7
+
+// CHECK: <label>:[[IMPER_BB6]]:
+// CHECK-NEXT:   call void @body2(i64 %3, i64 %8)
+// CHECK-NEXT:   %11 = add i64 %8, 2
+// CHECK-NEXT:   br label %[[IMPER_BB5]]
+bb6:	// pred: bb5
+  call @body2(%0, %2) : (index, index) -> ()
+  %c2 = constant 2 : index
+  %4 = addi %2, %c2 : index
+  br bb5(%4 : index)
+
+// CHECK: <label>:[[IMPER_BB7]]:
+// CHECK-NEXT:   call void @post(i64 %3)
+// CHECK-NEXT:   %13 = add i64 %3, 1
+// CHECK-NEXT:   br label %[[IMPER_BB2]]
+bb7:	// pred: bb5
+  call @post(%0) : (index) -> ()
+  %c1 = constant 1 : index
+  %5 = addi %0, %c1 : index
+  br bb2(%5 : index)
+
+// CHECK: <label>:[[IMPER_BB8]]:
+// CHECK-NEXT:   ret void
+bb8:	// pred: bb2
+  return
+}
+
+// CHECK: declare void @mid(i64)
+extfunc @mid(index)
+
+// CHECK: declare void @body3(i64, i64)
+extfunc @body3(index, index)
+
+// A complete function transformation check.
+// CHECK-LABEL: define void @more_imperfectly_nested_loops() {
+// CHECK-NEXT:   br label %1
+// CHECK: ; <label>:1:                                      ; preds = %0
+// CHECK-NEXT:   br label %2
+// CHECK: ; <label>:2:                                      ; preds = %19, %1
+// CHECK-NEXT:   %3 = phi i64 [ %20, %19 ], [ 0, %1 ]
+// CHECK-NEXT:   %4 = icmp slt i64 %3, 42
+// CHECK-NEXT:   br i1 %4, label %5, label %21
+// CHECK: ; <label>:5:                                      ; preds = %2
+// CHECK-NEXT:   call void @pre(i64 %3)
+// CHECK-NEXT:   br label %6
+// CHECK: ; <label>:6:                                      ; preds = %5
+// CHECK-NEXT:   br label %7
+// CHECK: ; <label>:7:                                      ; preds = %10, %6
+// CHECK-NEXT:   %8 = phi i64 [ %11, %10 ], [ 7, %6 ]
+// CHECK-NEXT:   %9 = icmp slt i64 %8, 56
+// CHECK-NEXT:   br i1 %9, label %10, label %12
+// CHECK: ; <label>:10:                                     ; preds = %7
+// CHECK-NEXT:   call void @body2(i64 %3, i64 %8)
+// CHECK-NEXT:   %11 = add i64 %8, 2
+// CHECK-NEXT:   br label %7
+// CHECK: ; <label>:12:                                     ; preds = %7
+// CHECK-NEXT:   call void @mid(i64 %3)
+// CHECK-NEXT:   br label %13
+// CHECK: ; <label>:13:                                     ; preds = %12
+// CHECK-NEXT:   br label %14
+// CHECK: ; <label>:14:                                     ; preds = %17, %13
+// CHECK-NEXT:   %15 = phi i64 [ %18, %17 ], [ 18, %13 ]
+// CHECK-NEXT:   %16 = icmp slt i64 %15, 37
+// CHECK-NEXT:   br i1 %16, label %17, label %19
+// CHECK: ; <label>:17:                                     ; preds = %14
+// CHECK-NEXT:   call void @body3(i64 %3, i64 %15)
+// CHECK-NEXT:   %18 = add i64 %15, 3
+// CHECK-NEXT:   br label %14
+// CHECK: ; <label>:19:                                     ; preds = %14
+// CHECK-NEXT:   call void @post(i64 %3)
+// CHECK-NEXT:   %20 = add i64 %3, 1
+// CHECK-NEXT:   br label %2
+// CHECK: ; <label>:21:                                     ; preds = %2
+// CHECK-NEXT:   ret void
+// CHECK-NEXT: }
+cfgfunc @more_imperfectly_nested_loops() {
+bb0:
+  br bb1
+bb1:	// pred: bb0
+  %c0 = constant 0 : index
+  %c42 = constant 42 : index
+  br bb2(%c0 : index)
+bb2(%0: index):	// 2 preds: bb1, bb11
+  %1 = cmpi "slt", %0, %c42 : index
+  cond_br %1, bb3, bb12
+bb3:	// pred: bb2
+  call @pre(%0) : (index) -> ()
+  br bb4
+bb4:	// pred: bb3
+  %c7 = constant 7 : index
+  %c56 = constant 56 : index
+  br bb5(%c7 : index)
+bb5(%2: index):	// 2 preds: bb4, bb6
+  %3 = cmpi "slt", %2, %c56 : index
+  cond_br %3, bb6, bb7
+bb6:	// pred: bb5
+  call @body2(%0, %2) : (index, index) -> ()
+  %c2 = constant 2 : index
+  %4 = addi %2, %c2 : index
+  br bb5(%4 : index)
+bb7:	// pred: bb5
+  call @mid(%0) : (index) -> ()
+  br bb8
+bb8:	// pred: bb7
+  %c18 = constant 18 : index
+  %c37 = constant 37 : index
+  br bb9(%c18 : index)
+bb9(%5: index):	// 2 preds: bb8, bb10
+  %6 = cmpi "slt", %5, %c37 : index
+  cond_br %6, bb10, bb11
+bb10:	// pred: bb9
+  call @body3(%0, %5) : (index, index) -> ()
+  %c3 = constant 3 : index
+  %7 = addi %5, %c3 : index
+  br bb9(%7 : index)
+bb11:	// pred: bb9
+  call @post(%0) : (index) -> ()
+  %c1 = constant 1 : index
+  %8 = addi %0, %c1 : index
+  br bb2(%8 : index)
+bb12:	// pred: bb2
+  return
+}
+