@@ -22,6 +22,7 @@ include "mlir/Dialect/OpenMP/OpenMPOpBase.td"
22
22
include "mlir/Interfaces/ControlFlowInterfaces.td"
23
23
include "mlir/Interfaces/SideEffectInterfaces.td"
24
24
include "mlir/IR/EnumAttr.td"
25
+ include "mlir/IR/OpAsmInterface.td"
25
26
include "mlir/IR/OpBase.td"
26
27
include "mlir/IR/SymbolInterfaces.td"
27
28
@@ -356,6 +357,212 @@ def SingleOp : OpenMP_Op<"single", traits = [
356
357
let hasVerifier = 1;
357
358
}
358
359
360
+ //===---------------------------------------------------------------------===//
361
+ // OpenMP Canonical Loop Info Type
362
+ //===---------------------------------------------------------------------===//
363
+
364
+ def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
365
+ let summary = "Type for representing a reference to a canonical loop";
366
+ let description = [{
367
+ A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
368
+ canonical loop in the same function. Values of this type are not
369
+ available at runtime and therefore cannot be used by the program itself,
370
+ i.e. an opaque type. It is similar to the transform dialect's
371
+ `!transform.interface` type, but instead of implementing an interface
372
+ for each transformation, the OpenMP dialect itself defines possible
373
+ operations on this type.
374
+
375
+ A value of type CanonicalLoopInfoType (in the following: CLI) value can be
376
+
377
+ 1. created by omp.new_cli.
378
+ 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
379
+ can only be associated once.
380
+ 3. passed to an omp loop transformation operation that modifies the loop
381
+ associated with the CLI. The CLI is the "applyee" and the operation is
382
+ the consumer. A CLI can only be consumed once.
383
+ 4. passed to an omp loop transformation operation to associate the cli with
384
+ a result of that transformation. The CLI is the "generatee" and the
385
+ operation is the generator.
386
+
387
+ A CLI cannot
388
+
389
+ 1. be returned from a function.
390
+ 2. be passed to operations that are not specifically designed to take a
391
+ CanonicalLoopInfoType, including AnyType.
392
+
393
+ A CLI directly corresponds to an object of
394
+ OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
395
+ }];
396
+ }
397
+
398
+ //===---------------------------------------------------------------------===//
399
+ // OpenMP Canonical Loop Info Creation
400
+ //===---------------------------------------------------------------------===//
401
+
402
+ def NewCliOp : OpenMP_Op<"new_cli",
403
+ [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
404
+ let summary = "Create a new Canonical Loop Info value.";
405
+ let description = [{
406
+ Create a new CLI that can be passed as an argument to a CanonicalLoopOp
407
+ and to loop transformation operations to handle dependencies between
408
+ loop transformation operations.
409
+ }];
410
+
411
+ let arguments = (ins );
412
+ let results = (outs CanonicalLoopInfoType:$result);
413
+ let assemblyFormat = [{
414
+ attr-dict
415
+ }];
416
+
417
+ let builders = [
418
+ OpBuilder<(ins )>,
419
+ ];
420
+
421
+ let hasVerifier = 1;
422
+ }
423
+
424
+ //===---------------------------------------------------------------------===//
425
+ // OpenMP Canonical Loop Operation
426
+ //===---------------------------------------------------------------------===//
427
+ def CanonicalLoopOp : OpenMPTransform_Op<"canonical_loop",
428
+ [DeclareOpInterfaceMethods<OpAsmOpInterface, [ "getAsmBlockNames", "getAsmBlockArgumentNames"]>]> {
429
+ let summary = "OpenMP Canonical Loop Operation";
430
+ let description = [{
431
+ All loops that conform to OpenMP's definition of a canonical loop can be
432
+ simplified to a CanonicalLoopOp. In particular, there are no loop-carried
433
+ variables and the number of iterations it will execute is known before the
434
+ operation. This allows e.g. to determine the number of threads and chunks
435
+ the iterations space is split into before executing any iteration. More
436
+ restrictions may apply in cases such as (collapsed) loop nests, doacross
437
+ loops, etc.
438
+
439
+ In contrast to other loop operations such as `scf.for`, the number of
440
+ iterations is determined by only a single variable, the trip-count. The
441
+ induction variable value is the logical iteration number of that iteration,
442
+ which OpenMP defines to be between 0 and the trip-count (exclusive).
443
+ Loop representation having lower-bound, upper-bound, and step-size operands,
444
+ require passes to do more work than necessary, including handling special
445
+ cases such as upper-bound smaller than lower-bound, upper-bound equal to
446
+ the integer type's maximal value, negative step size, etc. This complexity
447
+ is better only handled once by the front-end and can apply its semantics
448
+ for such cases while still being able to represent any kind of loop, which
449
+ kind of the point of a mid-end intermediate representation. User-defined
450
+ types such as random-access iterators in C++ could not directly be
451
+ represented anyway.
452
+
453
+ The induction variable is always of the same type as the tripcount argument.
454
+ Since it can never be negative, tripcount is always interpreted as an
455
+ unsigned integer. It is the caller's responsibility to ensure the tripcount
456
+ is not negative when its interpretation is signed, i.e.
457
+ `%tripcount = max(0,%tripcount)`.
458
+
459
+ An optional argument to a omp.canonical_loop that can be passed in
460
+ is a CanonicalLoopInfo value that can be used to refer to the canonical
461
+ loop to apply transformations -- such as tiling, unrolling, or
462
+ work-sharing -- to the loop, similar to the transform dialect but
463
+ with OpenMP-specific semantics. Because it is optional, it has to be the
464
+ last of the operands, but appears first in the pretty format printing.
465
+
466
+ The pretty assembly format is inspired by python syntax, where `range(n)`
467
+ returns an iterator that runs from $0$ to $n-1$. The pretty assembly syntax
468
+ is one of:
469
+
470
+ omp.canonical_loop(%cli) %iv : !type in range(%tripcount)
471
+ omp.canonical_loop %iv : !type in range(%tripcount)
472
+
473
+ A CanonicalLoopOp is lowered to LLVM-IR using
474
+ `OpenMPIRBuilder::createCanonicalLoop`.
475
+
476
+ #### Examples
477
+
478
+ Translation from lower-bound, upper-bound, step-size to trip-count.
479
+ ```c
480
+ for (int i = 3; i < 42; i+=2) {
481
+ B[i] = A[i];
482
+ }
483
+ ```
484
+
485
+ ```mlir
486
+ %lb = arith.constant 3 : i32
487
+ %ub = arith.constant 42 : i32
488
+ %step = arith.constant 2 : i32
489
+ %range = arith.sub %ub, %lb : i32
490
+ %tripcount = arith.div %range, %step : i32
491
+ omp.canonical_loop %iv : i32 in range(%tripcount) {
492
+ %offset = arith.mul %iv, %step : i32
493
+ %i = arith.add %offset, %lb : i32
494
+ %a = load %arrA[%i] : memref<?xf32>
495
+ store %a, %arrB[%i] : memref<?xf32>
496
+ }
497
+ ```
498
+
499
+ Nested canonical loop with transformation of the inner loop.
500
+ ```mlir
501
+ %outer = omp.new_cli : !omp.cli
502
+ %inner = omp.new_cli : !omp.cli
503
+ omp.canonical_loop(%outer) %iv1 : i32 in range(%tc1) {
504
+ omp.canonical_loop(%inner) %iv2 : i32 in range(%tc2) {
505
+ %a = load %arrA[%iv1, %iv2] : memref<?x?xf32>
506
+ store %a, %arrB[%iv1, %iv2] : memref<?x?xf32>
507
+ }
508
+ }
509
+ omp.unroll_full(%inner)
510
+ ```
511
+ }];
512
+
513
+
514
+ let arguments = (ins IntLikeType:$tripCount,
515
+ Optional<CanonicalLoopInfoType>:$cli);
516
+ let regions = (region AnyRegion:$region);
517
+
518
+ let extraClassDeclaration = [{
519
+ ::mlir::Value getInductionVar();
520
+ }];
521
+
522
+ let builders = [
523
+ OpBuilder<(ins "::mlir::Value":$tripCount)>,
524
+ OpBuilder<(ins "::mlir::Value":$tripCount, "::mlir::Value":$cli)>,
525
+ ];
526
+
527
+ let hasCustomAssemblyFormat = 1;
528
+ let hasVerifier = 1;
529
+ }
530
+
531
+ //===----------------------------------------------------------------------===//
532
+ // OpenMP unroll_heuristic operation
533
+ //===----------------------------------------------------------------------===//
534
+
535
+ def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
536
+ let summary = "OpenMP heuristic unroll operation";
537
+ let description = [{
538
+ Represents a `#pragma omp unroll` construct introduced in OpenMP 5.1.
539
+
540
+ The operation has one applyee and no generatees. The applyee is unrolled
541
+ according to implementation-defined heuristics. Implementations may choose
542
+ to not unroll the loop, partially unroll by a chosen factor, or fully
543
+ unroll it. Even if the implementation chooses to partially unroll the
544
+ applyee, the resulting unrolled loop is not accessible as a generatee. Use
545
+ omp.unroll_partial if a generatee is required.
546
+
547
+ The lowering is implemented using `OpenMPIRBuilder::unrollLoopHeuristic`,
548
+ which just attaches `llvm.loop.unroll.enable` metadata to the loop so the
549
+ unrolling is carried-out by LLVM's LoopUnroll pass. That is, unrolling only
550
+ actually performed in optimized builds.
551
+
552
+ Assembly formats:
553
+ omp.unroll_heuristic(%cli)
554
+ omp.unroll_heuristic(%cli) -> ()
555
+ }];
556
+
557
+ let arguments = (ins CanonicalLoopInfoType:$applyee);
558
+
559
+ let builders = [
560
+ OpBuilder<(ins "::mlir::Value":$cli)>,
561
+ ];
562
+
563
+ let hasCustomAssemblyFormat = 1;
564
+ }
565
+
359
566
//===----------------------------------------------------------------------===//
360
567
// 2.8.3 Workshare Construct
361
568
//===----------------------------------------------------------------------===//
0 commit comments