diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index 6659f4a2c58e8..812b72681343b 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -107,4 +107,39 @@ def GPU_SelectObjectAttr : GPU_Attr<"SelectObject", "select_object", [ let genVerifyDecl = 1; } +//===----------------------------------------------------------------------===// +// GPU LLVM offload attribute. +//===----------------------------------------------------------------------===// +def GPU_OffloadOpenMP : I32EnumAttrCase<"OpenMP", 1, "omp">; +def GPU_OffloadCUDA : I32EnumAttrCase<"CUDA", 2, "cuda">; +def GPU_OffloadHIP : I32EnumAttrCase<"HIP", 3, "hip">; +def GPU_OffloadKindEnum : GPU_I32Enum< + "OffloadKind", "GPU offload kind", [ + GPU_OffloadOpenMP, + GPU_OffloadCUDA, + GPU_OffloadHIP + ]>; + +def GPU_OffloadEmbeddingAttr : GPU_Attr<"OffloadEmbedding", "offload_embedding", [ + OffloadingTranslationAttrTrait + ]> { + let description = [{ + This GPU offloading handler uses LLVM offloading infrastructure to embed GPU + binaries in the IR. At program start, the LLVM offloading mechanism registers + kernels and variables with the runtime library: CUDA RT, HIP RT or + LibOMPTarget. + The offloading mechanism relies on the runtime library to dispatch the + correct kernel based on the registered symbols. + This offload mechanism requires to specify which runtime is being called, + this is done by the `kind` parameter. + Example: + ```mlir + gpu.binary @binary <#gpu.offload_embedding> [...] + gpu.binary @binary <#gpu.offload_embedding> [...] + ``` + }]; + let parameters = (ins "gpu::OffloadKind":$kind); + let assemblyFormat = [{ `<` $kind `>` }]; +} + #endif // GPU_COMPILATION_ATTRS diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h index 08019e77ae6af..1a55d08be9edc 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h @@ -19,6 +19,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" +#include "mlir/Interfaces/DataLayoutInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "llvm/IR/IntrinsicsNVPTX.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 7140e614412f9..c19820aafe168 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -17,6 +17,7 @@ include "mlir/IR/EnumAttr.td" include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/DataLayoutInterfaces.td" include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td" def LLVM_PointerGlobal : LLVM_PointerInAddressSpace<1>; @@ -1946,7 +1947,9 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async", // NVVM target attribute. //===----------------------------------------------------------------------===// -def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> { +def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target", [ + DeclareAttrInterfaceMethods + ]> { let description = [{ GPU target attribute for controlling compilation of NVIDIA targets. All parameters decay into default values if not present. diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h index c2a82ffc1c43c..fa1131a463e1a 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h @@ -26,6 +26,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" +#include "mlir/Interfaces/DataLayoutInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" ///// Ops ///// diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index 48b830ae34f29..a492709c29954 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -15,6 +15,7 @@ include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" +include "mlir/Interfaces/DataLayoutInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" //===----------------------------------------------------------------------===// @@ -608,8 +609,9 @@ def ROCDL_CvtSrFp8F32Op : // ROCDL target attribute. //===----------------------------------------------------------------------===// -def ROCDL_TargettAttr : - ROCDL_Attr<"ROCDLTarget", "target"> { +def ROCDL_TargettAttr : ROCDL_Attr<"ROCDLTarget", "target", [ + DeclareAttrInterfaceMethods + ]> { let description = [{ ROCDL target attribute for controlling compilation of AMDGPU targets. All parameters decay into default values if not present. diff --git a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td index a8def967fffcf..eac9521aadc11 100644 --- a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td +++ b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td @@ -188,6 +188,39 @@ def DataLayoutSpecInterface : AttrInterface<"DataLayoutSpecInterface"> { }]; } +def TargetInfoAttrInterface : AttrInterface<"TargetInfoAttrInterface"> { + let cppNamespace = "::mlir"; + + let description = [{ + Attribute interface describing target information. + + Target information attributes provide essential information on the + compilation target. This information includes the target triple identifier, + the target chip identifier, and a string representation of the target features. + }]; + + let methods = [ + InterfaceMethod< + /*description=*/"Returns the target triple identifier.", + /*retTy=*/"::mlir::StringRef", + /*methodName=*/"getTargetTriple", + /*args=*/(ins) + >, + InterfaceMethod< + /*description=*/"Returns the target chip identifier.", + /*retTy=*/"::mlir::StringRef", + /*methodName=*/"getTargetChip", + /*args=*/(ins) + >, + InterfaceMethod< + /*description=*/"Returns the target features as a string.", + /*retTy=*/"std::string", + /*methodName=*/"getTargetFeatures", + /*args=*/(ins) + > + ]; +} + //===----------------------------------------------------------------------===// // Operation interface //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Target/LLVM/Offload.h b/mlir/include/mlir/Target/LLVM/Offload.h new file mode 100644 index 0000000000000..7b705667d477d --- /dev/null +++ b/mlir/include/mlir/Target/LLVM/Offload.h @@ -0,0 +1,61 @@ +//===- Offload.h - LLVM Target Offload --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares LLVM target offload utility classes. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVM_OFFLOAD_H +#define MLIR_TARGET_LLVM_OFFLOAD_H + +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class Constant; +class GlobalVariable; +class Module; +} // namespace llvm + +namespace mlir { +namespace LLVM { +/// `OffloadHandler` is a utility class for creating LLVM offload entries. LLVM +/// offload entries hold information on offload symbols; for example, for a GPU +/// kernel, this includes its host address to identify the kernel and the kernel +/// identifier in the binary. Arrays of offload entries can be used to register +/// functions within the CUDA/HIP runtime. Libomptarget also uses these entries +/// to register OMP target offload kernels and variables. +class OffloadHandler { +public: + using OffloadEntryArray = + std::pair; + OffloadHandler(llvm::Module &module) : module(module) {} + + /// Returns the begin symbol name used in the entry array. + static std::string getBeginSymbol(StringRef suffix); + + /// Returns the end symbol name used in the entry array. + static std::string getEndSymbol(StringRef suffix); + + /// Returns the entry array if it exists or a pair of null pointers. + OffloadEntryArray getEntryArray(StringRef suffix); + + /// Emits an empty array of offloading entries. + OffloadEntryArray emitEmptyEntryArray(StringRef suffix); + + /// Inserts an offloading entry into an existing entry array. This method + /// returns failure if the entry array hasn't been declared. + LogicalResult insertOffloadEntry(StringRef suffix, llvm::Constant *entry); + +protected: + llvm::Module &module; +}; +} // namespace LLVM +} // namespace mlir + +#endif // MLIR_TARGET_LLVM_OFFLOAD_H diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt index b00259677697a..00b78e30ee8b0 100644 --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -61,6 +61,7 @@ add_mlir_dialect_library(MLIRNVVMDialect LINK_LIBS PUBLIC MLIRIR MLIRLLVMDialect + MLIRDataLayoutInterfaces MLIRSideEffectInterfaces ) @@ -83,5 +84,6 @@ add_mlir_dialect_library(MLIRROCDLDialect LINK_LIBS PUBLIC MLIRIR MLIRLLVMDialect + MLIRDataLayoutInterfaces MLIRSideEffectInterfaces ) diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index aa49c4dc31fbc..b73504ac4969a 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -1106,6 +1106,14 @@ NVVMTargetAttr::verify(function_ref emitError, return success(); } +StringRef NVVMTargetAttr::getTargetTriple() const { return getTriple(); } + +StringRef NVVMTargetAttr::getTargetChip() const { return getChip(); } + +std::string NVVMTargetAttr::getTargetFeatures() const { + return getFeatures().str(); +} + #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc" diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp index 26e46b31ddc01..8b10c48718a3f 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp @@ -295,6 +295,14 @@ ROCDLTargetAttr::verify(function_ref emitError, return success(); } +StringRef ROCDLTargetAttr::getTargetTriple() const { return getTriple(); } + +StringRef ROCDLTargetAttr::getTargetChip() const { return getChip(); } + +std::string ROCDLTargetAttr::getTargetFeatures() const { + return getFeatures().str(); +} + #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc" diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index cc2c3a00a02ea..241a6c64dd868 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_library(MLIRTargetLLVM ModuleToObject.cpp + Offload.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVM @@ -16,6 +17,7 @@ add_mlir_library(MLIRTargetLLVM Passes Support Target + FrontendOffloading LINK_LIBS PUBLIC MLIRExecutionEngineUtils MLIRTargetLLVMIRExport diff --git a/mlir/lib/Target/LLVM/Offload.cpp b/mlir/lib/Target/LLVM/Offload.cpp new file mode 100644 index 0000000000000..81ba12403bfb9 --- /dev/null +++ b/mlir/lib/Target/LLVM/Offload.cpp @@ -0,0 +1,111 @@ +//===- Offload.cpp - LLVM Target Offload ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines LLVM target offload utility classes. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVM/Offload.h" +#include "llvm/Frontend/Offloading/Utility.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" + +using namespace mlir; +using namespace mlir::LLVM; + +std::string OffloadHandler::getBeginSymbol(StringRef suffix) { + return ("__begin_offload_" + suffix).str(); +} + +std::string OffloadHandler::getEndSymbol(StringRef suffix) { + return ("__end_offload_" + suffix).str(); +} + +namespace { +/// Returns the type of the entry array. +llvm::ArrayType *getEntryArrayType(llvm::Module &module, size_t numElems) { + return llvm::ArrayType::get(llvm::offloading::getEntryTy(module), numElems); +} + +/// Creates the initializer of the entry array. +llvm::Constant *getEntryArrayBegin(llvm::Module &module, + ArrayRef entries) { + // If there are no entries return a constant zero initializer. + llvm::ArrayType *arrayTy = getEntryArrayType(module, entries.size()); + return entries.empty() ? llvm::ConstantAggregateZero::get(arrayTy) + : llvm::ConstantArray::get(arrayTy, entries); +} + +/// Computes the end position of the entry array. +llvm::Constant *getEntryArrayEnd(llvm::Module &module, + llvm::GlobalVariable *begin, size_t numElems) { + llvm::Type *intTy = module.getDataLayout().getIntPtrType(module.getContext()); + return llvm::ConstantExpr::getGetElementPtr( + llvm::offloading::getEntryTy(module), begin, + ArrayRef({llvm::ConstantInt::get(intTy, numElems)}), + true); +} +} // namespace + +OffloadHandler::OffloadEntryArray +OffloadHandler::getEntryArray(StringRef suffix) { + llvm::GlobalVariable *beginGV = + module.getGlobalVariable(getBeginSymbol(suffix), true); + llvm::GlobalVariable *endGV = + module.getGlobalVariable(getEndSymbol(suffix), true); + return {beginGV, endGV}; +} + +OffloadHandler::OffloadEntryArray +OffloadHandler::emitEmptyEntryArray(StringRef suffix) { + llvm::ArrayType *arrayTy = getEntryArrayType(module, 0); + auto *beginGV = new llvm::GlobalVariable( + module, arrayTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, + getEntryArrayBegin(module, {}), getBeginSymbol(suffix)); + auto *endGV = new llvm::GlobalVariable( + module, llvm::PointerType::get(module.getContext(), 0), + /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, + getEntryArrayEnd(module, beginGV, 0), getEndSymbol(suffix)); + return {beginGV, endGV}; +} + +LogicalResult OffloadHandler::insertOffloadEntry(StringRef suffix, + llvm::Constant *entry) { + // Get the begin and end symbols to the entry array. + std::string beginSymId = getBeginSymbol(suffix); + llvm::GlobalVariable *beginGV = module.getGlobalVariable(beginSymId, true); + llvm::GlobalVariable *endGV = + module.getGlobalVariable(getEndSymbol(suffix), true); + // Fail if the symbols are missing. + if (!beginGV || !endGV) + return failure(); + // Create the entry initializer. + assert(beginGV->getInitializer() && "entry array initializer is missing."); + // Add existing entries into the new entry array. + SmallVector entries; + if (auto beginInit = dyn_cast_or_null( + beginGV->getInitializer())) { + for (unsigned i = 0; i < beginInit->getNumOperands(); ++i) + entries.push_back(beginInit->getOperand(i)); + } + // Add the new entry. + entries.push_back(entry); + // Create a global holding the new updated set of entries. + auto *arrayTy = llvm::ArrayType::get(llvm::offloading::getEntryTy(module), + entries.size()); + auto *entryArr = new llvm::GlobalVariable( + module, arrayTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage, + getEntryArrayBegin(module, entries), beginSymId, endGV); + // Replace the old entry array variable withe new one. + beginGV->replaceAllUsesWith(entryArr); + beginGV->eraseFromParent(); + entryArr->setName(beginSymId); + // Update the end symbol. + endGV->setInitializer(getEntryArrayEnd(module, entryArr, entries.size())); + return success(); +} diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt index 11816ff5c2c1f..b95b1e95a039b 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt @@ -1,14 +1,17 @@ add_mlir_translation_library(MLIRGPUToLLVMIRTranslation GPUToLLVMIRTranslation.cpp - SelectObjectAttr.cpp + OffloadingTranslationAttrs.cpp LINK_COMPONENTS Core + FrontendOffloading + Object LINK_LIBS PUBLIC MLIRIR MLIRGPUDialect MLIRLLVMDialect MLIRSupport + MLIRTargetLLVM MLIRTargetLLVMIRExport ) diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/OffloadingTranslationAttrs.cpp similarity index 54% rename from mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp rename to mlir/lib/Target/LLVMIR/Dialect/GPU/OffloadingTranslationAttrs.cpp index 0eb33287d608b..4448b72615e21 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/OffloadingTranslationAttrs.cpp @@ -25,6 +25,9 @@ using namespace mlir; +//===----------------------------------------------------------------------===// +// SelectObjectAttr +//===----------------------------------------------------------------------===// namespace { // Implementation of the `OffloadingLLVMTranslationAttrInterface` model. class SelectObjectAttrImpl @@ -54,13 +57,6 @@ std::string getBinaryIdentifier(StringRef binaryName) { } } // namespace -void mlir::gpu::registerOffloadingLLVMTranslationInterfaceExternalModels( - DialectRegistry ®istry) { - registry.addExtension(+[](MLIRContext *ctx, gpu::GPUDialect *dialect) { - SelectObjectAttr::attachInterface(*ctx); - }); -} - gpu::ObjectAttr SelectObjectAttrImpl::getSelectedObject(gpu::BinaryOp op) const { ArrayRef objects = op.getObjectsAttr().getValue(); @@ -136,6 +132,9 @@ class LaunchKernel { // Get the kernel launch callee. FunctionCallee getKernelLaunchFn(); + // Get the kernel RT launch callee. + FunctionCallee getKernelRTLaunchFn(); + // Get the kernel launch callee. FunctionCallee getClusterKernelLaunchFn(); @@ -166,9 +165,15 @@ class LaunchKernel { // Create the void* kernel array for passing the arguments. Value *createKernelArgArray(mlir::gpu::LaunchFuncOp op); + // Returns a pair containing the function pointer to the kernel and the + // pointer to the kernel module. + mlir::FailureOr> + getKernelInfo(mlir::gpu::LaunchFuncOp op, mlir::gpu::ObjectAttr object); + // Create the full kernel launch. mlir::LogicalResult createKernelLaunch(mlir::gpu::LaunchFuncOp op, - mlir::gpu::ObjectAttr object); + mlir::gpu::ObjectAttr object, + Value *kernelPtr = nullptr); private: Module &module; @@ -244,6 +249,16 @@ llvm::FunctionCallee llvm::LaunchKernel::getClusterKernelLaunchFn() { false)); } +llvm::FunctionCallee llvm::LaunchKernel::getKernelRTLaunchFn() { + return module.getOrInsertFunction( + "mgpuLaunchKernelRT", + FunctionType::get(voidTy, + ArrayRef({ptrTy, intPtrTy, intPtrTy, intPtrTy, + intPtrTy, intPtrTy, intPtrTy, i32Ty, + ptrTy, ptrTy, ptrTy, i64Ty}), + false)); +} + llvm::FunctionCallee llvm::LaunchKernel::getModuleFunctionFn() { return module.getOrInsertFunction( "mgpuModuleGetFunction", @@ -334,46 +349,14 @@ llvm::LaunchKernel::createKernelArgArray(mlir::gpu::LaunchFuncOp op) { return argArray; } -// Emits LLVM IR to launch a kernel function: +// Loads the kernel module pointer // %0 = call %binarygetter // %1 = call %moduleLoad(%0) // %2 = // %3 = call %moduleGetFunction(%1, %2) -// %4 = call %streamCreate() -// %5 = -// call %launchKernel(%3, , 0, %4, %5, nullptr) -// call %streamSynchronize(%4) -// call %streamDestroy(%4) -// call %moduleUnload(%1) -mlir::LogicalResult -llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, - mlir::gpu::ObjectAttr object) { - auto llvmValue = [&](mlir::Value value) -> Value * { - Value *v = moduleTranslation.lookupValue(value); - assert(v && "Value has not been translated."); - return v; - }; - - // Get grid dimensions. - mlir::gpu::KernelDim3 grid = op.getGridSizeOperandValues(); - Value *gx = llvmValue(grid.x), *gy = llvmValue(grid.y), - *gz = llvmValue(grid.z); - - // Get block dimensions. - mlir::gpu::KernelDim3 block = op.getBlockSizeOperandValues(); - Value *bx = llvmValue(block.x), *by = llvmValue(block.y), - *bz = llvmValue(block.z); - - // Get dynamic shared memory size. - Value *dynamicMemorySize = nullptr; - if (mlir::Value dynSz = op.getDynamicSharedMemorySize()) - dynamicMemorySize = llvmValue(dynSz); - else - dynamicMemorySize = ConstantInt::get(i32Ty, 0); - - // Create the argument array. - Value *argArray = createKernelArgArray(op); - +mlir::FailureOr> +llvm::LaunchKernel::getKernelInfo(mlir::gpu::LaunchFuncOp op, + mlir::gpu::ObjectAttr object) { // Default JIT optimization level. llvm::Constant *optV = llvm::ConstantInt::get(i32Ty, 0); // Check if there's an optimization level embedded in the object. @@ -385,7 +368,6 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, return op.emitError("the optimization level must be an integer"); optV = llvm::ConstantInt::get(i32Ty, optLevel.getValue()); } - // Load the kernel module. StringRef moduleName = op.getKernelModuleName().getValue(); std::string binaryIdentifier = getBinaryIdentifier(moduleName); @@ -417,6 +399,56 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, getModuleFunctionFn(), {moduleObject, getOrCreateFunctionName(moduleName, op.getKernelName().getValue())}); + return std::pair(moduleFunction, moduleObject); +} + +// Emits LLVM IR to launch a kernel function: +// %4 = call %streamCreate() +// %5 = +// call %launchKernel(%3, , 0, %4, %5, nullptr) +// call %streamSynchronize(%4) +// call %streamDestroy(%4) +// call %moduleUnload(%1) +mlir::LogicalResult +llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, + mlir::gpu::ObjectAttr object, + Value *kernelPtr) { + auto llvmValue = [&](mlir::Value value) -> Value * { + Value *v = moduleTranslation.lookupValue(value); + assert(v && "Value has not been translated."); + return v; + }; + + // Get grid dimensions. + mlir::gpu::KernelDim3 grid = op.getGridSizeOperandValues(); + Value *gx = llvmValue(grid.x), *gy = llvmValue(grid.y), + *gz = llvmValue(grid.z); + + // Get block dimensions. + mlir::gpu::KernelDim3 block = op.getBlockSizeOperandValues(); + Value *bx = llvmValue(block.x), *by = llvmValue(block.y), + *bz = llvmValue(block.z); + + // Get dynamic shared memory size. + Value *dynamicMemorySize = nullptr; + if (mlir::Value dynSz = op.getDynamicSharedMemorySize()) + dynamicMemorySize = llvmValue(dynSz); + else + dynamicMemorySize = ConstantInt::get(i32Ty, 0); + + // Create the argument array. + Value *argArray = createKernelArgArray(op); + + Value *moduleObject = nullptr, *moduleFunction = nullptr; + + if (!kernelPtr) { + mlir::FailureOr> kernelInfo = + getKernelInfo(op, object); + if (failed(kernelInfo)) + return failure(); + moduleFunction = kernelInfo->first; + moduleObject = kernelInfo->second; + } // Get the stream to use for execution. If there's no async object then create // a stream to make a synchronous kernel launch. @@ -436,19 +468,27 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, Value *nullPtr = ConstantPointerNull::get(ptrTy); // Launch kernel with clusters if cluster size is specified. - if (op.hasClusterSize()) { - mlir::gpu::KernelDim3 cluster = op.getClusterSizeOperandValues(); - Value *cx = llvmValue(cluster.x), *cy = llvmValue(cluster.y), - *cz = llvmValue(cluster.z); - builder.CreateCall( - getClusterKernelLaunchFn(), - ArrayRef({moduleFunction, cx, cy, cz, gx, gy, gz, bx, by, bz, - dynamicMemorySize, stream, argArray, nullPtr})); + if (moduleFunction) { + if (op.hasClusterSize()) { + mlir::gpu::KernelDim3 cluster = op.getClusterSizeOperandValues(); + Value *cx = llvmValue(cluster.x), *cy = llvmValue(cluster.y), + *cz = llvmValue(cluster.z); + builder.CreateCall( + getClusterKernelLaunchFn(), + ArrayRef({moduleFunction, cx, cy, cz, gx, gy, gz, bx, by, bz, + dynamicMemorySize, stream, argArray, nullPtr})); + } else { + builder.CreateCall(getKernelLaunchFn(), + ArrayRef({moduleFunction, gx, gy, gz, bx, by, + bz, dynamicMemorySize, stream, + argArray, nullPtr, paramsCount})); + } } else { - builder.CreateCall(getKernelLaunchFn(), - ArrayRef({moduleFunction, gx, gy, gz, bx, by, - bz, dynamicMemorySize, stream, - argArray, nullPtr, paramsCount})); + assert(kernelPtr && "invalid kernel pointer"); + builder.CreateCall( + getKernelRTLaunchFn(), + ArrayRef({kernelPtr, gx, gy, gz, bx, by, bz, dynamicMemorySize, + stream, argArray, nullPtr, paramsCount})); } // Sync & destroy the stream, for synchronous launches. @@ -458,7 +498,285 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, } // Unload the kernel module. - builder.CreateCall(getModuleUnloadFn(), {moduleObject}); + if (moduleObject) + builder.CreateCall(getModuleUnloadFn(), {moduleObject}); + + return success(); +} + +//===----------------------------------------------------------------------===// +// OffloadEmbeddingAttr +//===----------------------------------------------------------------------===// +#include "mlir/Target/LLVM/Offload.h" +#include "llvm/Frontend/Offloading/OffloadWrapper.h" +#include "llvm/Frontend/Offloading/Utility.h" +#include "llvm/Object/OffloadBinary.h" + +namespace { +// Implementation of the `OffloadingLLVMTranslationAttrInterface` model. +class OffloadEmbeddingAttrImpl + : public gpu::OffloadingLLVMTranslationAttrInterface::FallbackModel< + OffloadEmbeddingAttrImpl> { +public: + // Translates a `gpu.binary`, embedding the binary into a host LLVM module as + // global binary string. + LogicalResult embedBinary(Attribute attribute, Operation *operation, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const; + + // Translates a `gpu.launch_func` to a sequence of LLVM instructions resulting + // in a kernel launch call. + LogicalResult launchKernel(Attribute attribute, + Operation *launchFuncOperation, + Operation *binaryOperation, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const; +}; +} // namespace + +namespace { +llvm::object::ImageKind getImageKind(gpu::CompilationTarget format) { + switch (format) { + case gpu::CompilationTarget::Offload: + return llvm::object::IMG_Bitcode; + case gpu::CompilationTarget::Assembly: + return llvm::object::IMG_PTX; + case gpu::CompilationTarget::Binary: + return llvm::object::IMG_Object; + case gpu::CompilationTarget::Fatbin: + return llvm::object::IMG_Fatbinary; + } +} + +llvm::object::OffloadKind getOffloadKind(gpu::OffloadKind offloadKind) { + switch (offloadKind) { + case gpu::OffloadKind::OpenMP: + return llvm::object::OFK_OpenMP; + case gpu::OffloadKind::CUDA: + return llvm::object::OFK_Cuda; + case gpu::OffloadKind::HIP: + return llvm::object::OFK_HIP; + } +} + +using OffloadEntryArray = LLVM::OffloadHandler::OffloadEntryArray; + +/// Utility class for embedding binaries and launching kernels using the +/// offloading attribute. +class OffloadManager : public LLVM::OffloadHandler { +public: + OffloadManager(gpu::BinaryOp binaryOp, llvm::Module &module, + gpu::OffloadKind offloadKind) + : LLVM::OffloadHandler(module), binaryOp(binaryOp), + offloadKind(offloadKind) {} + + /// Embed a GPU binary into a module. + LogicalResult embedBinary(); + + /// Generates the kernel launch call. + LogicalResult launchKernel(gpu::LaunchFuncOp launchFunc, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + +protected: + /// Returns the name to be used for the offloading symbols. + StringRef getSymbolSuffix(); + + /// Emits the offloading entry for `launchFunc`. + LogicalResult emitOffloadingEntry(gpu::LaunchFuncOp launchFunc, + llvm::Constant *registeredSym); + + /// Bundle OpenMP images together. + SmallVector> + bundleOpenMP(ArrayRef objects); + + /// Bundle gpu-objects together. TODO: support more than a single object. + FailureOr>> + bundleGPU(ArrayRef objects); + + /// Bundle objects depending on the `gpu::OffloadKind`. + FailureOr>> + bundleImages(ArrayRef objects); + + /// Emit registration code and embed the images. + LogicalResult wrapImages(llvm::Module &module, ArrayRef> imgs); + + /// Convert a `ObjectAttr` to a OffloadingImage. + llvm::object::OffloadBinary::OffloadingImage + getOffloadingImage(gpu::ObjectAttr obj); + gpu::BinaryOp binaryOp; + gpu::OffloadKind offloadKind; +}; +} // namespace + +llvm::object::OffloadBinary::OffloadingImage +OffloadManager::getOffloadingImage(gpu::ObjectAttr obj) { + // Create the binary used by Libomptarget + auto targetAttr = cast(obj.getTarget()); + llvm::object::OffloadBinary::OffloadingImage imageBinary{}; + imageBinary.TheImageKind = getImageKind(obj.getFormat()); + imageBinary.TheOffloadKind = getOffloadKind(offloadKind); + imageBinary.StringData["triple"] = targetAttr.getTargetTriple(); + imageBinary.StringData["arch"] = targetAttr.getTargetChip(); + imageBinary.Image = + llvm::MemoryBuffer::getMemBufferCopy(obj.getObject().getValue(), ""); + return imageBinary; +} + +SmallVector> +OffloadManager::bundleOpenMP(ArrayRef objects) { + // Bundle all the available objects in the binary. + SmallVector> buffers; + for (Attribute attr : objects) + buffers.emplace_back( + llvm::MemoryBuffer::getMemBufferCopy(llvm::object::OffloadBinary::write( + getOffloadingImage(cast(attr))))); + return buffers; +} + +FailureOr>> +OffloadManager::bundleGPU(ArrayRef objects) { + if (objects.size() > 1) + return binaryOp.emitError("multiple objects are not yet supported"); + SmallVector> buffers; + assert(objects.size() == 1 && "there should be a single object"); + auto object = cast(objects[0]); + if (gpu::CompilationTarget frmt = object.getFormat(); + frmt != gpu::CompilationTarget::Binary && + frmt != gpu::CompilationTarget::Fatbin) + return binaryOp.emitError( + "the only supported objects are binaries and fat-binaries."); + buffers.emplace_back( + llvm::MemoryBuffer::getMemBuffer(object.getObject().getValue())); + return buffers; +} + +FailureOr>> +OffloadManager::bundleImages(ArrayRef objects) { + switch (offloadKind) { + case gpu::OffloadKind::OpenMP: + return bundleOpenMP(objects); + case gpu::OffloadKind::CUDA: + case gpu::OffloadKind::HIP: + return bundleGPU(objects); + } +} + +StringRef OffloadManager::getSymbolSuffix() { return binaryOp.getName(); } + +LogicalResult +OffloadManager::emitOffloadingEntry(gpu::LaunchFuncOp launchFunc, + llvm::Constant *registeredSym) { + // Create the entry initializer. + std::pair entry = + llvm::offloading::getOffloadingEntryInitializer( + module, registeredSym, launchFunc.getKernelName().getValue(), 0, 0, + 0); + if (failed(insertOffloadEntry(getSymbolSuffix(), entry.first))) + return binaryOp.emitError("entry array symbols not found"); + return success(); +} + +LogicalResult OffloadManager::wrapImages(llvm::Module &module, + ArrayRef> imgs) { + // This suffix is appended to all the symbols emitted by the `wrap*` methods. + std::string suffix = "." + getSymbolSuffix().str(); + // Emit an empty entry array. + OffloadEntryArray entryArray = emitEmptyEntryArray(getSymbolSuffix()); + switch (offloadKind) { + case gpu::OffloadKind::OpenMP: + if (auto error = llvm::offloading::wrapOpenMPBinaries(module, imgs, + entryArray, suffix)) + return binaryOp.emitError("failed wrapping the OpenMP binaries"); + return success(); + case gpu::OffloadKind::CUDA: + if (auto error = llvm::offloading::wrapCudaBinary( + module, imgs.front(), entryArray, suffix, false)) + return binaryOp.emitError("failed wrapping the CUDA binaries"); + return success(); + case gpu::OffloadKind::HIP: + if (auto error = llvm::offloading::wrapHIPBinary(module, imgs.front(), + entryArray, suffix, false)) + return binaryOp.emitError("failed wrapping the HIP binaries"); + return success(); + } +} + +LogicalResult OffloadManager::embedBinary() { + // Call all the methods in order, bundleImages -> wrapImages. + auto bundledImgs = bundleImages(binaryOp.getObjectsAttr().getValue()); + if (failed(bundledImgs)) + return failure(); + SmallVector> imgs; + for (auto &img : bundledImgs.value()) + imgs.push_back(ArrayRef(img->getBufferStart(), img->getBufferSize())); + if (failed(wrapImages(module, imgs))) + return failure(); + return success(); +} + +LogicalResult +OffloadManager::launchKernel(gpu::LaunchFuncOp launchFunc, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + // OpenMMP kernels launches are handled by the `omp.target` op. + if (offloadKind == gpu::OffloadKind::OpenMP) + return binaryOp.emitError( + "it's invalid to call OpenMP kernels using gpu.launch_func"); + llvm::Module *hostModule = moduleTranslation.getLLVMModule(); + // Create or get the symbol to be registered. + std::string symbolId = + (binaryOp.getName() + "_K" + launchFunc.getKernelName().getValue()).str(); + llvm::Constant *registeredSym = nullptr; + if (!(registeredSym = hostModule->getGlobalVariable(symbolId))) { + // Create the symbol used to register the kernel with the runtime. + registeredSym = new llvm::GlobalVariable( + *hostModule, builder.getInt8Ty(), /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, builder.getInt8(0), symbolId); + // Emit the offload entry. + if (failed(emitOffloadingEntry(launchFunc, registeredSym))) + return failure(); + } + return llvm::LaunchKernel(*moduleTranslation.getLLVMModule(), builder, + moduleTranslation) + .createKernelLaunch(launchFunc, nullptr, registeredSym); +} + +LogicalResult OffloadEmbeddingAttrImpl::embedBinary( + Attribute attribute, Operation *operation, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const { + if (failed(OffloadManager( + mlir::cast(operation), + *moduleTranslation.getLLVMModule(), + mlir::cast(attribute).getKind()) + .embedBinary())) + return failure(); + return success(); +} + +LogicalResult OffloadEmbeddingAttrImpl::launchKernel( + Attribute attribute, Operation *launchFuncOperation, + Operation *binaryOperation, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const { + if (failed( + OffloadManager( + mlir::cast(binaryOperation), + *moduleTranslation.getLLVMModule(), + mlir::cast(attribute).getKind()) + .launchKernel(mlir::cast(launchFuncOperation), + builder, moduleTranslation))) + return failure(); return success(); } + +//===----------------------------------------------------------------------===// +// Interface registration +//===----------------------------------------------------------------------===// +void mlir::gpu::registerOffloadingLLVMTranslationInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, gpu::GPUDialect *dialect) { + SelectObjectAttr::attachInterface(*ctx); + OffloadEmbeddingAttr::attachInterface(*ctx); + }); +} diff --git a/mlir/test/Target/LLVMIR/gpu.mlir b/mlir/test/Target/LLVMIR/gpu.mlir index 88672bd231df8..74dfa53558d71 100644 --- a/mlir/test/Target/LLVMIR/gpu.mlir +++ b/mlir/test/Target/LLVMIR/gpu.mlir @@ -101,3 +101,86 @@ module attributes {gpu.container_module} { llvm.return } } + +// ----- + +// Test the `offload_embedding` attribute. +module attributes {gpu.container_module} { + // CHECK: @__begin_offload_kernel_module = internal constant [1 x %{{.*}}] [%{{.*}} { ptr @[[KERNEL_SYMBOL:.*]], ptr @[[ENTRY_NAME:.*]], i64 0, i32 0, i32 0 }] + // CHECK: @__end_offload_kernel_module = internal constant ptr getelementptr inbounds (%{{.*}}, ptr @__begin_offload_kernel_module, i64 1) + // CHECK: @[[FATBIN:.*]] = internal constant [4 x i8] c"BLOB", section ".nv_fatbin" + // CHECK: @[[FATBIN_HANDLE:.*]] = internal constant %{{.*}} { i32 1180844977, i32 1, ptr @[[FATBIN]] + // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @[[REGISTRATION_CTOR:.*]], ptr null }] + // CHECK: @[[KERNEL_SYMBOL]] = internal constant i8 0 + // CHECK-NEXT: @[[ENTRY_NAME]] = internal unnamed_addr constant [7 x i8] c"kernel\00" + gpu.binary @kernel_module <#gpu.offload_embedding> [#gpu.object<#nvvm.target, bin = "BLOB">] + llvm.func @foo() { + // CHECK: [[ARGS:%.*]] = alloca %{{.*}}, align 8 + // CHECK-NEXT: [[ARGS_ARRAY:%.*]] = alloca ptr, i64 2, align 8 + // CHECK-NEXT: [[ARG0:%.*]] = getelementptr inbounds [[ARGS_TY]], ptr [[ARGS]], i32 0, i32 0 + // CHECK-NEXT: store i32 32, ptr [[ARG0]], align 4 + // CHECK-NEXT: %{{.*}} = getelementptr ptr, ptr [[ARGS_ARRAY]], i32 0 + // CHECK-NEXT: store ptr [[ARG0]], ptr %{{.*}}, align 8 + // CHECK-NEXT: [[ARG1:%.*]] = getelementptr inbounds [[ARGS_TY]], ptr [[ARGS]], i32 0, i32 1 + // CHECK-NEXT: store i32 32, ptr [[ARG1]], align 4 + // CHECK-NEXT: %{{.*}} = getelementptr ptr, ptr [[ARGS_ARRAY]], i32 1 + // CHECK-NEXT: store ptr [[ARG1]], ptr %{{.*}}, align 8 + // CHECK-NEXT: [[STREAM:%.*]] = call ptr @mgpuStreamCreate() + // CHECK-NEXT: call void @mgpuLaunchKernelRT(ptr @[[KERNEL_SYMBOL]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 256, ptr [[STREAM]], ptr [[ARGS_ARRAY]], ptr null, i64 2) + // CHECK-NEXT: call void @mgpuStreamSynchronize(ptr [[STREAM]]) + // CHECK-NEXT: call void @mgpuStreamDestroy(ptr [[STREAM]]) + %0 = llvm.mlir.constant(8 : index) : i64 + %1 = llvm.mlir.constant(32 : i32) : i32 + %2 = llvm.mlir.constant(256 : i32) : i32 + gpu.launch_func @kernel_module::@kernel blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %2 args(%1 : i32, %1 : i32) + llvm.return + } + // CHECK: define internal void @[[REGISTRATION_CTOR]] + // CHECK: %{{.*}} = call ptr @__cudaRegisterFatBinary(ptr @[[FATBIN_HANDLE]]) +} + +// ----- + +// Test the `offload_embedding` attribute. +module attributes {gpu.container_module} { + // CHECK: @__begin_offload_kernel_module = internal constant [2 x %{{.*}}] [ + // CHECK: %{{.*}} { ptr @[[KERNEL_1_SYMBOL:.*]], ptr @[[ENTRY_NAME_1:.*]], i64 0, i32 0, i32 0 }, + // CHECK: %{{.*}} { ptr @[[KERNEL_2_SYMBOL:.*]], ptr @[[ENTRY_NAME_2:.*]], i64 0, i32 0, i32 0 }] + // CHECK: @__end_offload_kernel_module = internal constant ptr getelementptr inbounds (%{{.*}}, ptr @__begin_offload_kernel_module, i64 2) + // CHECK: @[[FATBIN:.*]] = internal constant [4 x i8] c"BLOB", section ".hip_fatbin" + // CHECK: @[[FATBIN_HANDLE:.*]] = internal constant %{{.*}} { i32 1212764230, i32 1, ptr @[[FATBIN]] + // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @[[REGISTRATION_CTOR:.*]], ptr null }] + // CHECK: @[[KERNEL_1_SYMBOL]] = internal constant i8 0 + // CHECK-NEXT: @[[ENTRY_NAME_1]] = internal unnamed_addr constant [9 x i8] c"kernel_1\00" + // CHECK: @[[KERNEL_2_SYMBOL]] = internal constant i8 0 + // CHECK-NEXT: @[[ENTRY_NAME_2]] = internal unnamed_addr constant [9 x i8] c"kernel_2\00" + gpu.binary @kernel_module <#gpu.offload_embedding> [#gpu.object<#rocdl.target, bin = "BLOB">] + llvm.func @foo() { + %0 = llvm.mlir.constant(8 : index) : i64 + %1 = llvm.mlir.constant(32 : i32) : i32 + %2 = llvm.mlir.constant(256 : i32) : i32 + gpu.launch_func @kernel_module::@kernel_1 blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %2 args(%1 : i32, %1 : i32) + gpu.launch_func @kernel_module::@kernel_2 blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %2 args(%1 : i32, %1 : i32) + llvm.return + } + // CHECK: define internal void @[[REGISTRATION_CTOR]] + // CHECK: %{{.*}} = call ptr @__hipRegisterFatBinary(ptr @[[FATBIN_HANDLE]]) +} + +// ----- + +// Test the `offload_embedding` attribute. +module attributes {gpu.container_module} { + // CHECK: @__begin_offload_kernel_module = internal constant [0 x %{{.*}}] zeroinitializer + // CHECK: @__end_offload_kernel_module = internal constant ptr @__begin_offload_kernel_module + // CHECK: @[[BINARY:.*]] = internal unnamed_addr constant [{{.*}} x i8] c"{{.*}}", section ".llvm.offloading", align 8 + // CHECK: @[[BINARIES:.*]] = internal unnamed_addr constant [1 x %{{.*}}] [%{{.*}} { ptr getelementptr inbounds ([{{.*}} x i8], ptr @[[BINARY]], i64 0, i64 {{.*}}), ptr getelementptr inbounds ([{{.*}} x i8], ptr @[[BINARY]], i64 0, i64 {{.*}}), ptr @__begin_offload_kernel_module, ptr @__end_offload_kernel_module }] + // CHECK: @[[DESCRIPTOR:.*]] = internal constant %{{.*}} { i32 1, ptr @[[BINARIES]], ptr @__begin_offload_kernel_module, ptr @__end_offload_kernel_module } + // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @[[REGISTRATION_CTOR:.*]], ptr null }] + // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @[[REGISTRATION_DTOR:.*]], ptr null }] + gpu.binary @kernel_module <#gpu.offload_embedding> [#gpu.object<#rocdl.target, bin = "BLOB">] + // CHECK: define internal void @[[REGISTRATION_CTOR]] + // CHECK: call {{.*}} @__tgt_register_lib(ptr @[[DESCRIPTOR]]) + // CHECK: define internal void @[[REGISTRATION_DTOR]] + // CHECK: call {{.*}} @__tgt_unregister_lib(ptr @[[DESCRIPTOR]]) +} diff --git a/mlir/unittests/Target/LLVM/CMakeLists.txt b/mlir/unittests/Target/LLVM/CMakeLists.txt index 6d612548a94c0..d04f38ddddfac 100644 --- a/mlir/unittests/Target/LLVM/CMakeLists.txt +++ b/mlir/unittests/Target/LLVM/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_unittest(MLIRTargetLLVMTests + Offload.cpp SerializeNVVMTarget.cpp SerializeROCDLTarget.cpp SerializeToLLVMBitcode.cpp diff --git a/mlir/unittests/Target/LLVM/Offload.cpp b/mlir/unittests/Target/LLVM/Offload.cpp new file mode 100644 index 0000000000000..375edc2e9614d --- /dev/null +++ b/mlir/unittests/Target/LLVM/Offload.cpp @@ -0,0 +1,49 @@ +//===- Offload.cpp ----------------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVM/Offload.h" +#include "llvm/Frontend/Offloading/Utility.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" + +#include "gmock/gmock.h" + +using namespace llvm; + +TEST(MLIRTarget, OffloadAPI) { + using OffloadEntryArray = mlir::LLVM::OffloadHandler::OffloadEntryArray; + LLVMContext llvmContext; + Module llvmModule("offload", llvmContext); + mlir::LLVM::OffloadHandler handler(llvmModule); + StringRef suffix = ".mlir"; + // Check there's no entry array with `.mlir` suffix. + OffloadEntryArray entryArray = handler.getEntryArray(suffix); + EXPECT_EQ(entryArray, OffloadEntryArray()); + // Emit the entry array. + handler.emitEmptyEntryArray(suffix); + // Check there's an entry array with `.mlir` suffix. + entryArray = handler.getEntryArray(suffix); + ASSERT_NE(entryArray.first, nullptr); + ASSERT_NE(entryArray.second, nullptr); + // Check the array contains no entries. + auto *zeroInitializer = dyn_cast_or_null( + entryArray.first->getInitializer()); + ASSERT_NE(zeroInitializer, nullptr); + // Insert an empty entries. + auto emptyEntry = + ConstantAggregateZero::get(offloading::getEntryTy(llvmModule)); + ASSERT_TRUE(succeeded(handler.insertOffloadEntry(suffix, emptyEntry))); + // Check there's an entry in the entry array with `.mlir` suffix. + entryArray = handler.getEntryArray(suffix); + ASSERT_NE(entryArray.first, nullptr); + Constant *arrayInitializer = entryArray.first->getInitializer(); + ASSERT_NE(arrayInitializer, nullptr); + auto *arrayTy = dyn_cast_or_null(arrayInitializer->getType()); + ASSERT_NE(arrayTy, nullptr); + EXPECT_EQ(arrayTy->getNumElements(), 1u); +}