Skip to content

[mlir][gpu] Add the OffloadEmbeddingAttr offloading translation attr #78117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,39 @@ def GPU_SelectObjectAttr : GPU_Attr<"SelectObject", "select_object", [
let genVerifyDecl = 1;
}

//===----------------------------------------------------------------------===//
// GPU LLVM offload attribute.
//===----------------------------------------------------------------------===//
def GPU_OffloadOpenMP : I32EnumAttrCase<"OpenMP", 1, "omp">;
def GPU_OffloadCUDA : I32EnumAttrCase<"CUDA", 2, "cuda">;
def GPU_OffloadHIP : I32EnumAttrCase<"HIP", 3, "hip">;
def GPU_OffloadKindEnum : GPU_I32Enum<
"OffloadKind", "GPU offload kind", [
GPU_OffloadOpenMP,
GPU_OffloadCUDA,
GPU_OffloadHIP
]>;

def GPU_OffloadEmbeddingAttr : GPU_Attr<"OffloadEmbedding", "offload_embedding", [
OffloadingTranslationAttrTrait
]> {
let description = [{
This GPU offloading handler uses LLVM offloading infrastructure to embed GPU
binaries in the IR. At program start, the LLVM offloading mechanism registers
kernels and variables with the runtime library: CUDA RT, HIP RT or
LibOMPTarget.
The offloading mechanism relies on the runtime library to dispatch the
correct kernel based on the registered symbols.
This offload mechanism requires to specify which runtime is being called,
this is done by the `kind` parameter.
Example:
```mlir
gpu.binary @binary <#gpu.offload_embedding<omp>> [...]
gpu.binary @binary <#gpu.offload_embedding<cuda>> [...]
```
}];
let parameters = (ins "gpu::OffloadKind":$kind);
let assemblyFormat = [{ `<` $kind `>` }];
}

#endif // GPU_COMPILATION_ATTRS
1 change: 1 addition & 0 deletions mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/Interfaces/DataLayoutInterfaces.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "llvm/IR/IntrinsicsNVPTX.h"

Expand Down
5 changes: 4 additions & 1 deletion mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ include "mlir/IR/EnumAttr.td"
include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/DataLayoutInterfaces.td"
include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td"

def LLVM_PointerGlobal : LLVM_PointerInAddressSpace<1>;
Expand Down Expand Up @@ -1946,7 +1947,9 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async",
// NVVM target attribute.
//===----------------------------------------------------------------------===//

def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> {
def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target", [
DeclareAttrInterfaceMethods<TargetInfoAttrInterface>
]> {
let description = [{
GPU target attribute for controlling compilation of NVIDIA targets. All
parameters decay into default values if not present.
Expand Down
1 change: 1 addition & 0 deletions mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/Interfaces/DataLayoutInterfaces.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"

///// Ops /////
Expand Down
6 changes: 4 additions & 2 deletions mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
include "mlir/Interfaces/DataLayoutInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -608,8 +609,9 @@ def ROCDL_CvtSrFp8F32Op :
// ROCDL target attribute.
//===----------------------------------------------------------------------===//

def ROCDL_TargettAttr :
ROCDL_Attr<"ROCDLTarget", "target"> {
def ROCDL_TargettAttr : ROCDL_Attr<"ROCDLTarget", "target", [
DeclareAttrInterfaceMethods<TargetInfoAttrInterface>
]> {
let description = [{
ROCDL target attribute for controlling compilation of AMDGPU targets. All
parameters decay into default values if not present.
Expand Down
33 changes: 33 additions & 0 deletions mlir/include/mlir/Interfaces/DataLayoutInterfaces.td
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,39 @@ def DataLayoutSpecInterface : AttrInterface<"DataLayoutSpecInterface"> {
}];
}

def TargetInfoAttrInterface : AttrInterface<"TargetInfoAttrInterface"> {
let cppNamespace = "::mlir";

let description = [{
Attribute interface describing target information.

Target information attributes provide essential information on the
compilation target. This information includes the target triple identifier,
the target chip identifier, and a string representation of the target features.
}];

let methods = [
InterfaceMethod<
/*description=*/"Returns the target triple identifier.",
/*retTy=*/"::mlir::StringRef",
/*methodName=*/"getTargetTriple",
/*args=*/(ins)
>,
InterfaceMethod<
/*description=*/"Returns the target chip identifier.",
/*retTy=*/"::mlir::StringRef",
/*methodName=*/"getTargetChip",
/*args=*/(ins)
>,
InterfaceMethod<
/*description=*/"Returns the target features as a string.",
/*retTy=*/"std::string",
/*methodName=*/"getTargetFeatures",
/*args=*/(ins)
>
];
}

//===----------------------------------------------------------------------===//
// Operation interface
//===----------------------------------------------------------------------===//
Expand Down
61 changes: 61 additions & 0 deletions mlir/include/mlir/Target/LLVM/Offload.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
//===- Offload.h - LLVM Target Offload --------------------------*- C++ -*-===//
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like this is doing the same kind of work that the OffloadInfoManager is doing. Would it be possible to refactor that to not have to add this class?

Copy link
Contributor Author

@fabianmcg fabianmcg Feb 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, the OffloadInfoManager creates the entries and adds them to the omp_offloading_entries section. However, the OffloadInfoManager performs no explicit construction of the entry array needed by the binary descriptor. It's the linker's job to implicitly create the array using all the entries in the section.

The problem with this approach is that LLJIT doesn't handle the implicit creation of the array very well. To overcome this limitation of LLJIT, the attribute constructs the entry array explicitly.

In summary, this class can be removed up to an extent, but then JIT compilation is impossible, and a real linker is needed to obtain the final executable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It sounds like Clang isn't able to be used with the LLJIT in that case, or if it does, then there is already a solution in Clang. I think making this work both for Clang and MLIR would be useful. If there is already a solution in Clang then it should be migrated to the OpenMPIRBuilder.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The real problem is the lack of comprehensive support of linker sections in LLJIT, so I wouldn't say clang, or the clang-linker-wrapper are at fault. The easiest solution that I found was complying with LLJIT.
I think @jhuber6 was looking into changing the registration mechanism of LibOMPTarget binaries, so maybe we can found a solution that works for all.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have the full view of what LLJIT does here, but the use-case in clang is that we need each TU to be able to emit values that need to be registered by the runtime. There are a few alternate solutions to this, but having the linker handle it is the best overall. The rework I was talking about was to simply change the offloading entry struct so it's more generic.

How does LLJIT work exactly? If you put globals into a section they will generally appear in order, so if you had a pointer to the first and last globals in that section you could just traverse it once it's gone through the backend. This is somewhat similar to the COFF linker handling which just gives an object at the beginning and end of the others in that section.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a few alternate solutions to this, but having the linker handle it is the best overall.

I agree, I think the best solution would be to make LLJIT work.

The rework I was talking about was to simply change the offloading entry struct so it's more generic.

I see.

How does LLJIT work exactly?

Honestly, I'm not 100% sure, I only know that the same IR would work if linked with a regular linker and fail with LLJIT.
I asked around on LLJIT discord a couple months ago why it was not picking out the symbols and they didn't give an answer.

I'll inquire further with them and comeback with a more definitive answer.

//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares LLVM target offload utility classes.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_TARGET_LLVM_OFFLOAD_H
#define MLIR_TARGET_LLVM_OFFLOAD_H

#include "mlir/Support/LogicalResult.h"
#include "llvm/ADT/StringRef.h"

namespace llvm {
class Constant;
class GlobalVariable;
class Module;
} // namespace llvm

namespace mlir {
namespace LLVM {
/// `OffloadHandler` is a utility class for creating LLVM offload entries. LLVM
/// offload entries hold information on offload symbols; for example, for a GPU
/// kernel, this includes its host address to identify the kernel and the kernel
/// identifier in the binary. Arrays of offload entries can be used to register
/// functions within the CUDA/HIP runtime. Libomptarget also uses these entries
/// to register OMP target offload kernels and variables.
class OffloadHandler {
public:
using OffloadEntryArray =
std::pair<llvm::GlobalVariable *, llvm::GlobalVariable *>;
OffloadHandler(llvm::Module &module) : module(module) {}

/// Returns the begin symbol name used in the entry array.
static std::string getBeginSymbol(StringRef suffix);

/// Returns the end symbol name used in the entry array.
static std::string getEndSymbol(StringRef suffix);

/// Returns the entry array if it exists or a pair of null pointers.
OffloadEntryArray getEntryArray(StringRef suffix);

/// Emits an empty array of offloading entries.
OffloadEntryArray emitEmptyEntryArray(StringRef suffix);

/// Inserts an offloading entry into an existing entry array. This method
/// returns failure if the entry array hasn't been declared.
LogicalResult insertOffloadEntry(StringRef suffix, llvm::Constant *entry);

protected:
llvm::Module &module;
};
} // namespace LLVM
} // namespace mlir

#endif // MLIR_TARGET_LLVM_OFFLOAD_H
2 changes: 2 additions & 0 deletions mlir/lib/Dialect/LLVMIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ add_mlir_dialect_library(MLIRNVVMDialect
LINK_LIBS PUBLIC
MLIRIR
MLIRLLVMDialect
MLIRDataLayoutInterfaces
MLIRSideEffectInterfaces
)

Expand All @@ -83,5 +84,6 @@ add_mlir_dialect_library(MLIRROCDLDialect
LINK_LIBS PUBLIC
MLIRIR
MLIRLLVMDialect
MLIRDataLayoutInterfaces
MLIRSideEffectInterfaces
)
8 changes: 8 additions & 0 deletions mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,14 @@ NVVMTargetAttr::verify(function_ref<InFlightDiagnostic()> emitError,
return success();
}

StringRef NVVMTargetAttr::getTargetTriple() const { return getTriple(); }

StringRef NVVMTargetAttr::getTargetChip() const { return getChip(); }

std::string NVVMTargetAttr::getTargetFeatures() const {
return getFeatures().str();
}

#define GET_OP_CLASSES
#include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"

Expand Down
8 changes: 8 additions & 0 deletions mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,14 @@ ROCDLTargetAttr::verify(function_ref<InFlightDiagnostic()> emitError,
return success();
}

StringRef ROCDLTargetAttr::getTargetTriple() const { return getTriple(); }

StringRef ROCDLTargetAttr::getTargetChip() const { return getChip(); }

std::string ROCDLTargetAttr::getTargetFeatures() const {
return getFeatures().str();
}

#define GET_OP_CLASSES
#include "mlir/Dialect/LLVMIR/ROCDLOps.cpp.inc"

Expand Down
2 changes: 2 additions & 0 deletions mlir/lib/Target/LLVM/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
add_mlir_library(MLIRTargetLLVM
ModuleToObject.cpp
Offload.cpp

ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVM
Expand All @@ -16,6 +17,7 @@ add_mlir_library(MLIRTargetLLVM
Passes
Support
Target
FrontendOffloading
LINK_LIBS PUBLIC
MLIRExecutionEngineUtils
MLIRTargetLLVMIRExport
Expand Down
111 changes: 111 additions & 0 deletions mlir/lib/Target/LLVM/Offload.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
//===- Offload.cpp - LLVM Target Offload ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines LLVM target offload utility classes.
//
//===----------------------------------------------------------------------===//

#include "mlir/Target/LLVM/Offload.h"
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"

using namespace mlir;
using namespace mlir::LLVM;

std::string OffloadHandler::getBeginSymbol(StringRef suffix) {
return ("__begin_offload_" + suffix).str();
}

std::string OffloadHandler::getEndSymbol(StringRef suffix) {
return ("__end_offload_" + suffix).str();
}

namespace {
/// Returns the type of the entry array.
llvm::ArrayType *getEntryArrayType(llvm::Module &module, size_t numElems) {
return llvm::ArrayType::get(llvm::offloading::getEntryTy(module), numElems);
}

/// Creates the initializer of the entry array.
llvm::Constant *getEntryArrayBegin(llvm::Module &module,
ArrayRef<llvm::Constant *> entries) {
// If there are no entries return a constant zero initializer.
llvm::ArrayType *arrayTy = getEntryArrayType(module, entries.size());
return entries.empty() ? llvm::ConstantAggregateZero::get(arrayTy)
: llvm::ConstantArray::get(arrayTy, entries);
}

/// Computes the end position of the entry array.
llvm::Constant *getEntryArrayEnd(llvm::Module &module,
llvm::GlobalVariable *begin, size_t numElems) {
llvm::Type *intTy = module.getDataLayout().getIntPtrType(module.getContext());
return llvm::ConstantExpr::getGetElementPtr(
llvm::offloading::getEntryTy(module), begin,
ArrayRef<llvm::Constant *>({llvm::ConstantInt::get(intTy, numElems)}),
true);
}
} // namespace

OffloadHandler::OffloadEntryArray
OffloadHandler::getEntryArray(StringRef suffix) {
llvm::GlobalVariable *beginGV =
module.getGlobalVariable(getBeginSymbol(suffix), true);
llvm::GlobalVariable *endGV =
module.getGlobalVariable(getEndSymbol(suffix), true);
return {beginGV, endGV};
}

OffloadHandler::OffloadEntryArray
OffloadHandler::emitEmptyEntryArray(StringRef suffix) {
llvm::ArrayType *arrayTy = getEntryArrayType(module, 0);
auto *beginGV = new llvm::GlobalVariable(
module, arrayTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage,
getEntryArrayBegin(module, {}), getBeginSymbol(suffix));
auto *endGV = new llvm::GlobalVariable(
module, llvm::PointerType::get(module.getContext(), 0),
/*isConstant=*/true, llvm::GlobalValue::InternalLinkage,
getEntryArrayEnd(module, beginGV, 0), getEndSymbol(suffix));
return {beginGV, endGV};
}

LogicalResult OffloadHandler::insertOffloadEntry(StringRef suffix,
llvm::Constant *entry) {
// Get the begin and end symbols to the entry array.
std::string beginSymId = getBeginSymbol(suffix);
llvm::GlobalVariable *beginGV = module.getGlobalVariable(beginSymId, true);
llvm::GlobalVariable *endGV =
module.getGlobalVariable(getEndSymbol(suffix), true);
// Fail if the symbols are missing.
if (!beginGV || !endGV)
return failure();
// Create the entry initializer.
assert(beginGV->getInitializer() && "entry array initializer is missing.");
// Add existing entries into the new entry array.
SmallVector<llvm::Constant *> entries;
if (auto beginInit = dyn_cast_or_null<llvm::ConstantAggregate>(
beginGV->getInitializer())) {
for (unsigned i = 0; i < beginInit->getNumOperands(); ++i)
entries.push_back(beginInit->getOperand(i));
}
// Add the new entry.
entries.push_back(entry);
// Create a global holding the new updated set of entries.
auto *arrayTy = llvm::ArrayType::get(llvm::offloading::getEntryTy(module),
entries.size());
auto *entryArr = new llvm::GlobalVariable(
module, arrayTy, /*isConstant=*/true, llvm::GlobalValue::InternalLinkage,
getEntryArrayBegin(module, entries), beginSymId, endGV);
// Replace the old entry array variable withe new one.
beginGV->replaceAllUsesWith(entryArr);
beginGV->eraseFromParent();
entryArr->setName(beginSymId);
// Update the end symbol.
endGV->setInitializer(getEntryArrayEnd(module, entryArr, entries.size()));
return success();
}
5 changes: 4 additions & 1 deletion mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
add_mlir_translation_library(MLIRGPUToLLVMIRTranslation
GPUToLLVMIRTranslation.cpp
SelectObjectAttr.cpp
OffloadingTranslationAttrs.cpp

LINK_COMPONENTS
Core
FrontendOffloading
Object

LINK_LIBS PUBLIC
MLIRIR
MLIRGPUDialect
MLIRLLVMDialect
MLIRSupport
MLIRTargetLLVM
MLIRTargetLLVMIRExport
)
Loading