blob: f75413fdaed952dd141c38a92ad210afd684b28b [file] [log] [blame]
//===- mlir-cpu-runner.cpp - MLIR CPU Execution Driver---------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This is a command line utility that executes an MLIR file on the GPU by
// translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the
// latter.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Module.h"
#include "mlir/LLVMIR/LLVMDialect.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/JitRunner.h"
#include "mlir/Transforms/DialectConversion.h"
#include "cuda.h"
using namespace mlir;
inline void emit_cuda_error(const llvm::Twine &message, const char *buffer,
CUresult error, FuncOp &function) {
function.emitError(message.concat(" failed with error code ")
.concat(llvm::Twine{error})
.concat("[")
.concat(buffer)
.concat("]"));
}
#define RETURN_ON_CUDA_ERROR(expr, msg) \
{ \
auto _cuda_error = (expr); \
if (_cuda_error != CUDA_SUCCESS) { \
emit_cuda_error(msg, jitErrorBuffer, _cuda_error, function); \
return {}; \
} \
}
OwnedCubin compilePtxToCubin(const std::string ptx, FuncOp &function) {
char jitErrorBuffer[4096] = {0};
RETURN_ON_CUDA_ERROR(cuInit(0), "cuInit");
// Linking requires a device context.
CUdevice device;
RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0), "cuDeviceGet");
CUcontext context;
RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device), "cuCtxCreate");
CUlinkState linkState;
CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
void *jitOptionsVals[] = {jitErrorBuffer,
reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
jitOptions, /* jit options */
jitOptionsVals, /* jit option values */
&linkState),
"cuLinkCreate");
RETURN_ON_CUDA_ERROR(
cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX,
const_cast<void *>(static_cast<const void *>(ptx.c_str())),
ptx.length(), function.getName().data(), /* kernel name */
0, /* number of jit options */
nullptr, /* jit options */
nullptr /* jit option values */
),
"cuLinkAddData");
void *cubinData;
size_t cubinSize;
RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize),
"cuLinkComplete");
char *cubinAsChar = static_cast<char *>(cubinData);
OwnedCubin result = llvm::make_unique<std::vector<char>>(
cubinAsChar, cubinAsChar + cubinSize);
// This will also destroy the cubin data.
RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState), "cuLinkDestroy");
return result;
}
namespace {
struct GPULaunchFuncOpLowering : public LLVMOpLowering {
public:
explicit GPULaunchFuncOpLowering(LLVMTypeConverter &lowering_)
: LLVMOpLowering(gpu::LaunchFuncOp::getOperationName(),
lowering_.getDialect()->getContext(), lowering_) {}
// Convert the kernel arguments to an LLVM type, preserve the rest.
PatternMatchResult
matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
ConversionPatternRewriter &rewriter) const override {
rewriter.clone(*op)->setOperands(operands);
return rewriter.replaceOp(op, llvm::None), matchSuccess();
}
};
} // end anonymous namespace
static LogicalResult runMLIRPasses(ModuleOp m) {
// As we gradually lower, the IR is inconsistent between passes. So do not
// verify inbetween.
PassManager pm(/*verifyPasses=*/false);
pm.addPass(createGpuKernelOutliningPass());
pm.addPass(createConvertToLLVMIRPass([](LLVMTypeConverter &converter,
OwningRewritePatternList &patterns) {
populateStdToLLVMConversionPatterns(converter, patterns);
patterns.insert<GPULaunchFuncOpLowering>(converter);
}));
pm.addPass(createLowerGpuOpsToNVVMOpsPass());
pm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin));
pm.addPass(createGenerateCubinAccessorPass());
pm.addPass(createConvertGpuLaunchFuncToCudaCallsPass());
if (failed(pm.run(m)))
return failure();
if (failed(m.verify()))
return failure();
return success();
}
int main(int argc, char **argv) {
return mlir::JitRunnerMain(argc, argv, &runMLIRPasses);
}