Skip to content

Commit 628c735

Browse files
authored
[MLIR][OpenMP] Add canonical loop operations (#147061)
Add the supporting OpenMP Dialect operations, types, and interfaces for modelling MLIR Operations: * omp.newcli * omp.canonical_loop MLIR Types: * !omp.cli MLIR Interfaces: * LoopTransformationInterface As a first loop transformations to be able to use these new operation in follow-up PRs (#144785) * omp.unroll_heuristic
1 parent d3ea7f2 commit 628c735

File tree

8 files changed

+928
-0
lines changed

8 files changed

+928
-0
lines changed

mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,9 @@
3737
#define GET_OP_CLASSES
3838
#include "mlir/Dialect/OpenMP/OpenMPOps.h.inc"
3939

40+
namespace mlir::omp {
41+
/// Find the omp.new_cli, generator, and consumer of a canonical loop info.
42+
std::tuple<NewCliOp, OpOperand *, OpOperand *> decodeCli(mlir::Value cli);
43+
} // namespace mlir::omp
44+
4045
#endif // MLIR_DIALECT_OPENMP_OPENMPDIALECT_H_

mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,4 +204,15 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [],
204204
let regions = !if(singleRegion, (region AnyRegion:$region), (region));
205205
}
206206

207+
208+
// Base class for OpenMP loop transformations (that either consume or generate
209+
// loops)
210+
//
211+
// Doesn't actually create a C++ base class (only defines default values for
212+
// tablegen classes that derive from this). Use LoopTransformationInterface
213+
// instead for common operations.
214+
class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> :
215+
OpenMP_Op<mnemonic, !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits) > {
216+
}
217+
207218
#endif // OPENMP_OP_BASE

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ include "mlir/Dialect/OpenMP/OpenMPOpBase.td"
2222
include "mlir/Interfaces/ControlFlowInterfaces.td"
2323
include "mlir/Interfaces/SideEffectInterfaces.td"
2424
include "mlir/IR/EnumAttr.td"
25+
include "mlir/IR/OpAsmInterface.td"
2526
include "mlir/IR/OpBase.td"
2627
include "mlir/IR/SymbolInterfaces.td"
2728

@@ -356,6 +357,212 @@ def SingleOp : OpenMP_Op<"single", traits = [
356357
let hasVerifier = 1;
357358
}
358359

360+
//===---------------------------------------------------------------------===//
361+
// OpenMP Canonical Loop Info Type
362+
//===---------------------------------------------------------------------===//
363+
364+
def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
365+
let summary = "Type for representing a reference to a canonical loop";
366+
let description = [{
367+
A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
368+
canonical loop in the same function. Values of this type are not
369+
available at runtime and therefore cannot be used by the program itself,
370+
i.e. an opaque type. It is similar to the transform dialect's
371+
`!transform.interface` type, but instead of implementing an interface
372+
for each transformation, the OpenMP dialect itself defines possible
373+
operations on this type.
374+
375+
A value of type CanonicalLoopInfoType (in the following: CLI) value can be
376+
377+
1. created by omp.new_cli.
378+
2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
379+
can only be associated once.
380+
3. passed to an omp loop transformation operation that modifies the loop
381+
associated with the CLI. The CLI is the "applyee" and the operation is
382+
the consumer. A CLI can only be consumed once.
383+
4. passed to an omp loop transformation operation to associate the cli with
384+
a result of that transformation. The CLI is the "generatee" and the
385+
operation is the generator.
386+
387+
A CLI cannot
388+
389+
1. be returned from a function.
390+
2. be passed to operations that are not specifically designed to take a
391+
CanonicalLoopInfoType, including AnyType.
392+
393+
A CLI directly corresponds to an object of
394+
OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
395+
}];
396+
}
397+
398+
//===---------------------------------------------------------------------===//
399+
// OpenMP Canonical Loop Info Creation
400+
//===---------------------------------------------------------------------===//
401+
402+
def NewCliOp : OpenMP_Op<"new_cli",
403+
[DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
404+
let summary = "Create a new Canonical Loop Info value.";
405+
let description = [{
406+
Create a new CLI that can be passed as an argument to a CanonicalLoopOp
407+
and to loop transformation operations to handle dependencies between
408+
loop transformation operations.
409+
}];
410+
411+
let arguments = (ins );
412+
let results = (outs CanonicalLoopInfoType:$result);
413+
let assemblyFormat = [{
414+
attr-dict
415+
}];
416+
417+
let builders = [
418+
OpBuilder<(ins )>,
419+
];
420+
421+
let hasVerifier = 1;
422+
}
423+
424+
//===---------------------------------------------------------------------===//
425+
// OpenMP Canonical Loop Operation
426+
//===---------------------------------------------------------------------===//
427+
def CanonicalLoopOp : OpenMPTransform_Op<"canonical_loop",
428+
[DeclareOpInterfaceMethods<OpAsmOpInterface, [ "getAsmBlockNames", "getAsmBlockArgumentNames"]>]> {
429+
let summary = "OpenMP Canonical Loop Operation";
430+
let description = [{
431+
All loops that conform to OpenMP's definition of a canonical loop can be
432+
simplified to a CanonicalLoopOp. In particular, there are no loop-carried
433+
variables and the number of iterations it will execute is known before the
434+
operation. This allows e.g. to determine the number of threads and chunks
435+
the iterations space is split into before executing any iteration. More
436+
restrictions may apply in cases such as (collapsed) loop nests, doacross
437+
loops, etc.
438+
439+
In contrast to other loop operations such as `scf.for`, the number of
440+
iterations is determined by only a single variable, the trip-count. The
441+
induction variable value is the logical iteration number of that iteration,
442+
which OpenMP defines to be between 0 and the trip-count (exclusive).
443+
Loop representation having lower-bound, upper-bound, and step-size operands,
444+
require passes to do more work than necessary, including handling special
445+
cases such as upper-bound smaller than lower-bound, upper-bound equal to
446+
the integer type's maximal value, negative step size, etc. This complexity
447+
is better only handled once by the front-end and can apply its semantics
448+
for such cases while still being able to represent any kind of loop, which
449+
kind of the point of a mid-end intermediate representation. User-defined
450+
types such as random-access iterators in C++ could not directly be
451+
represented anyway.
452+
453+
The induction variable is always of the same type as the tripcount argument.
454+
Since it can never be negative, tripcount is always interpreted as an
455+
unsigned integer. It is the caller's responsibility to ensure the tripcount
456+
is not negative when its interpretation is signed, i.e.
457+
`%tripcount = max(0,%tripcount)`.
458+
459+
An optional argument to a omp.canonical_loop that can be passed in
460+
is a CanonicalLoopInfo value that can be used to refer to the canonical
461+
loop to apply transformations -- such as tiling, unrolling, or
462+
work-sharing -- to the loop, similar to the transform dialect but
463+
with OpenMP-specific semantics. Because it is optional, it has to be the
464+
last of the operands, but appears first in the pretty format printing.
465+
466+
The pretty assembly format is inspired by python syntax, where `range(n)`
467+
returns an iterator that runs from $0$ to $n-1$. The pretty assembly syntax
468+
is one of:
469+
470+
omp.canonical_loop(%cli) %iv : !type in range(%tripcount)
471+
omp.canonical_loop %iv : !type in range(%tripcount)
472+
473+
A CanonicalLoopOp is lowered to LLVM-IR using
474+
`OpenMPIRBuilder::createCanonicalLoop`.
475+
476+
#### Examples
477+
478+
Translation from lower-bound, upper-bound, step-size to trip-count.
479+
```c
480+
for (int i = 3; i < 42; i+=2) {
481+
B[i] = A[i];
482+
}
483+
```
484+
485+
```mlir
486+
%lb = arith.constant 3 : i32
487+
%ub = arith.constant 42 : i32
488+
%step = arith.constant 2 : i32
489+
%range = arith.sub %ub, %lb : i32
490+
%tripcount = arith.div %range, %step : i32
491+
omp.canonical_loop %iv : i32 in range(%tripcount) {
492+
%offset = arith.mul %iv, %step : i32
493+
%i = arith.add %offset, %lb : i32
494+
%a = load %arrA[%i] : memref<?xf32>
495+
store %a, %arrB[%i] : memref<?xf32>
496+
}
497+
```
498+
499+
Nested canonical loop with transformation of the inner loop.
500+
```mlir
501+
%outer = omp.new_cli : !omp.cli
502+
%inner = omp.new_cli : !omp.cli
503+
omp.canonical_loop(%outer) %iv1 : i32 in range(%tc1) {
504+
omp.canonical_loop(%inner) %iv2 : i32 in range(%tc2) {
505+
%a = load %arrA[%iv1, %iv2] : memref<?x?xf32>
506+
store %a, %arrB[%iv1, %iv2] : memref<?x?xf32>
507+
}
508+
}
509+
omp.unroll_full(%inner)
510+
```
511+
}];
512+
513+
514+
let arguments = (ins IntLikeType:$tripCount,
515+
Optional<CanonicalLoopInfoType>:$cli);
516+
let regions = (region AnyRegion:$region);
517+
518+
let extraClassDeclaration = [{
519+
::mlir::Value getInductionVar();
520+
}];
521+
522+
let builders = [
523+
OpBuilder<(ins "::mlir::Value":$tripCount)>,
524+
OpBuilder<(ins "::mlir::Value":$tripCount, "::mlir::Value":$cli)>,
525+
];
526+
527+
let hasCustomAssemblyFormat = 1;
528+
let hasVerifier = 1;
529+
}
530+
531+
//===----------------------------------------------------------------------===//
532+
// OpenMP unroll_heuristic operation
533+
//===----------------------------------------------------------------------===//
534+
535+
def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
536+
let summary = "OpenMP heuristic unroll operation";
537+
let description = [{
538+
Represents a `#pragma omp unroll` construct introduced in OpenMP 5.1.
539+
540+
The operation has one applyee and no generatees. The applyee is unrolled
541+
according to implementation-defined heuristics. Implementations may choose
542+
to not unroll the loop, partially unroll by a chosen factor, or fully
543+
unroll it. Even if the implementation chooses to partially unroll the
544+
applyee, the resulting unrolled loop is not accessible as a generatee. Use
545+
omp.unroll_partial if a generatee is required.
546+
547+
The lowering is implemented using `OpenMPIRBuilder::unrollLoopHeuristic`,
548+
which just attaches `llvm.loop.unroll.enable` metadata to the loop so the
549+
unrolling is carried-out by LLVM's LoopUnroll pass. That is, unrolling only
550+
actually performed in optimized builds.
551+
552+
Assembly formats:
553+
omp.unroll_heuristic(%cli)
554+
omp.unroll_heuristic(%cli) -> ()
555+
}];
556+
557+
let arguments = (ins CanonicalLoopInfoType:$applyee);
558+
559+
let builders = [
560+
OpBuilder<(ins "::mlir::Value":$cli)>,
561+
];
562+
563+
let hasCustomAssemblyFormat = 1;
564+
}
565+
359566
//===----------------------------------------------------------------------===//
360567
// 2.8.3 Workshare Construct
361568
//===----------------------------------------------------------------------===//

mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,4 +551,90 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
551551
];
552552
}
553553

554+
def LoopTransformationInterface : OpInterface<"LoopTransformationInterface"> {
555+
let description = [{
556+
Methods that are common for OpenMP loop transformation operations.
557+
}];
558+
559+
let cppNamespace = "::mlir::omp";
560+
561+
let methods = [
562+
563+
InterfaceMethod<
564+
/*description=*/[{
565+
Get the indices for the arguments that represent CanonicalLoopInfo
566+
applyees, i.e. loops that are transformed/consumed by this operation.
567+
}],
568+
/*returnType=*/ "std::pair<unsigned, unsigned>",
569+
/*methodName=*/ "getApplyeesODSOperandIndexAndLength",
570+
/*args=*/(ins)
571+
>,
572+
573+
InterfaceMethod<
574+
/*description=*/[{
575+
Get the indices for the arguments that represent CanonicalLoopInfo
576+
generatees, i.e. loops that are emitted by this operation.
577+
}],
578+
/*returnType=*/ "std::pair<unsigned, unsigned>",
579+
/*methodName=*/ "getGenerateesODSOperandIndexAndLength",
580+
/*args=*/(ins)
581+
>,
582+
583+
InterfaceMethod<
584+
/*description=*/[{
585+
Return the number of applyees of this loop transformation.
586+
}],
587+
/*returnType=*/ "unsigned",
588+
/*methodName=*/ "getNumApplyees",
589+
/*args=*/ (ins),
590+
/*methodBody=*/ "",
591+
/*defaultImpl=*/[{
592+
return $_op.getApplyeesODSOperandIndexAndLength().second;
593+
}]
594+
>,
595+
596+
InterfaceMethod<
597+
/*description=*/[{
598+
Return the number of generatees of this loop transformation.
599+
}],
600+
/*returnType=*/ "unsigned",
601+
/*methodName=*/ "getNumGeneratees",
602+
/*args=*/ (ins),
603+
/*methodBody=*/ "",
604+
/*defaultImpl=*/[{
605+
return $_op.getGenerateesODSOperandIndexAndLength().second;
606+
}]
607+
>,
608+
609+
InterfaceMethod<
610+
/*description=*/[{
611+
Return whether the provided operand is an applyee of this operation.
612+
}],
613+
/*returnType=*/ "unsigned",
614+
/*methodName=*/ "isApplyee",
615+
/*args=*/ (ins "unsigned":$opnum),
616+
/*methodBody=*/ "",
617+
/*defaultImpl=*/[{
618+
auto applyeeArgs = $_op.getApplyeesODSOperandIndexAndLength();
619+
return (applyeeArgs.first <= opnum && opnum < applyeeArgs.first + applyeeArgs.second) ;
620+
}]
621+
>,
622+
623+
InterfaceMethod<
624+
/*description=*/[{
625+
Return whether the provided operand is a generatee of this operation.
626+
}],
627+
/*returnType=*/ "unsigned",
628+
/*methodName=*/ "isGeneratee",
629+
/*args=*/ (ins "unsigned":$opnum),
630+
/*methodBody=*/ "",
631+
/*defaultImpl=*/[{
632+
auto generateeArgs = $_op.getGenerateesODSOperandIndexAndLength();
633+
return (generateeArgs.first <= opnum && opnum < generateeArgs.first + generateeArgs.second) ;
634+
}]
635+
>,
636+
637+
];
638+
}
639+
554640
#endif // OPENMP_OPS_INTERFACES

0 commit comments

Comments
 (0)