@@ -3518,7 +3518,7 @@ def fir_BoxTotalElementsOp
3518
3518
3519
3519
def YieldOp : fir_Op<"yield",
3520
3520
[Pure, ReturnLike, Terminator,
3521
- ParentOneOf<["LocalitySpecifierOp"]>]> {
3521
+ ParentOneOf<["LocalitySpecifierOp", "DeclareReductionOp" ]>]> {
3522
3522
let summary = "loop yield and termination operation";
3523
3523
let description = [{
3524
3524
"fir.yield" yields SSA values from a fir dialect op region and
@@ -3656,6 +3656,103 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> {
3656
3656
let hasRegionVerifier = 1;
3657
3657
}
3658
3658
3659
+ def fir_DeclareReductionOp : fir_Op<"declare_reduction", [IsolatedFromAbove,
3660
+ Symbol]> {
3661
+ let summary = "declares a reduction kind";
3662
+ let description = [{
3663
+ Note: this operation is adapted from omp::DeclareReductionOp. There is a lot
3664
+ duplication at the moment. TODO Combine both ops into one. See:
3665
+ https://discourse.llvm.org/t/dialect-for-data-locality-sharing-specifiers-clauses-in-openmp-openacc-and-do-concurrent/86108.
3666
+
3667
+ Declares a `do concurrent` reduction. This requires two mandatory and three
3668
+ optional regions.
3669
+
3670
+ 1. The optional alloc region specifies how to allocate the thread-local
3671
+ reduction value. This region should not contain control flow and all
3672
+ IR should be suitable for inlining straight into an entry block. In
3673
+ the common case this is expected to contain only allocas. It is
3674
+ expected to `fir.yield` the allocated value on all control paths.
3675
+ If allocation is conditional (e.g. only allocate if the mold is
3676
+ allocated), this should be done in the initilizer region and this
3677
+ region not included. The alloc region is not used for by-value
3678
+ reductions (where allocation is implicit).
3679
+ 2. The initializer region specifies how to initialize the thread-local
3680
+ reduction value. This is usually the neutral element of the reduction.
3681
+ For convenience, the region has an argument that contains the value
3682
+ of the reduction accumulator at the start of the reduction. If an alloc
3683
+ region is specified, there is a second block argument containing the
3684
+ address of the allocated memory. The initializer region is expected to
3685
+ `fir.yield` the new value on all control flow paths.
3686
+ 3. The reduction region specifies how to combine two values into one, i.e.
3687
+ the reduction operator. It accepts the two values as arguments and is
3688
+ expected to `fir.yield` the combined value on all control flow paths.
3689
+ 4. The atomic reduction region is optional and specifies how two values
3690
+ can be combined atomically given local accumulator variables. It is
3691
+ expected to store the combined value in the first accumulator variable.
3692
+ 5. The cleanup region is optional and specifies how to clean up any memory
3693
+ allocated by the initializer region. The region has an argument that
3694
+ contains the value of the thread-local reduction accumulator. This will
3695
+ be executed after the reduction has completed.
3696
+
3697
+ Note that the MLIR type system does not allow for type-polymorphic
3698
+ reductions. Separate reduction declarations should be created for different
3699
+ element and accumulator types.
3700
+
3701
+ For initializer and reduction regions, the operand to `fir.yield` must
3702
+ match the parent operation's results.
3703
+ }];
3704
+
3705
+ let arguments = (ins SymbolNameAttr:$sym_name,
3706
+ TypeAttr:$type);
3707
+
3708
+ let regions = (region MaxSizedRegion<1>:$allocRegion,
3709
+ AnyRegion:$initializerRegion,
3710
+ AnyRegion:$reductionRegion,
3711
+ AnyRegion:$atomicReductionRegion,
3712
+ AnyRegion:$cleanupRegion);
3713
+
3714
+ let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword "
3715
+ "( `alloc` $allocRegion^ )? "
3716
+ "`init` $initializerRegion "
3717
+ "`combiner` $reductionRegion "
3718
+ "( `atomic` $atomicReductionRegion^ )? "
3719
+ "( `cleanup` $cleanupRegion^ )? ";
3720
+
3721
+ let extraClassDeclaration = [{
3722
+ mlir::BlockArgument getAllocMoldArg() {
3723
+ auto ®ion = getAllocRegion();
3724
+ return region.empty() ? nullptr : region.getArgument(0);
3725
+ }
3726
+ mlir::BlockArgument getInitializerMoldArg() {
3727
+ return getInitializerRegion().getArgument(0);
3728
+ }
3729
+ mlir::BlockArgument getInitializerAllocArg() {
3730
+ return getAllocRegion().empty() ?
3731
+ nullptr : getInitializerRegion().getArgument(1);
3732
+ }
3733
+ mlir::BlockArgument getReductionLhsArg() {
3734
+ return getReductionRegion().getArgument(0);
3735
+ }
3736
+ mlir::BlockArgument getReductionRhsArg() {
3737
+ return getReductionRegion().getArgument(1);
3738
+ }
3739
+ mlir::BlockArgument getAtomicReductionLhsArg() {
3740
+ auto ®ion = getAtomicReductionRegion();
3741
+ return region.empty() ? nullptr : region.getArgument(0);
3742
+ }
3743
+ mlir::BlockArgument getAtomicReductionRhsArg() {
3744
+ auto ®ion = getAtomicReductionRegion();
3745
+ return region.empty() ? nullptr : region.getArgument(1);
3746
+ }
3747
+ mlir::BlockArgument getCleanupAllocArg() {
3748
+ auto ®ion = getCleanupRegion();
3749
+ return region.empty() ? nullptr : region.getArgument(0);
3750
+ }
3751
+ }];
3752
+
3753
+ let hasRegionVerifier = 1;
3754
+ }
3755
+
3659
3756
def fir_DoConcurrentOp : fir_Op<"do_concurrent",
3660
3757
[SingleBlock, AutomaticAllocationScope]> {
3661
3758
let summary = "do concurrent loop wrapper";
@@ -3694,6 +3791,25 @@ def fir_LocalSpecifier {
3694
3791
);
3695
3792
}
3696
3793
3794
+ def fir_ReduceSpecifier {
3795
+ dag arguments = (ins
3796
+ Variadic<AnyType>:$reduce_vars,
3797
+ OptionalAttr<DenseBoolArrayAttr>:$reduce_byref,
3798
+
3799
+ // This introduces redundency in how reductions are modelled. In particular,
3800
+ // a single reduction is represented by 2 attributes:
3801
+ //
3802
+ // 1. `$reduce_syms` which is a list of `DeclareReductionOp`s.
3803
+ // 2. `$reduce_attrs` which is an array of `fir::ReduceAttr` values.
3804
+ //
3805
+ // The first makes it easier to map `do concurrent` to parallization models
3806
+ // (e.g. OpenMP and OpenACC) while the second makes it easier to map it to
3807
+ // nests of `fir.do_loop ... unodered` ops.
3808
+ OptionalAttr<SymbolRefArrayAttr>:$reduce_syms,
3809
+ OptionalAttr<ArrayAttr>:$reduce_attrs
3810
+ );
3811
+ }
3812
+
3697
3813
def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3698
3814
[AttrSizedOperandSegments, DeclareOpInterfaceMethods<LoopLikeOpInterface,
3699
3815
["getLoopInductionVars"]>,
@@ -3703,7 +3819,7 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3703
3819
let description = [{
3704
3820
An operation that models a Fortran `do concurrent` loop's header and block.
3705
3821
This is a single-region single-block terminator op that is expected to
3706
- terminate the region of a `omp .do_concurrent` wrapper op.
3822
+ terminate the region of a `fir .do_concurrent` wrapper op.
3707
3823
3708
3824
This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to
3709
3825
`scf.parallel`, a loop nest takes 3 groups of SSA values as operands that
@@ -3741,8 +3857,6 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3741
3857
- `lowerBound`: The group of SSA values for the nest's lower bounds.
3742
3858
- `upperBound`: The group of SSA values for the nest's upper bounds.
3743
3859
- `step`: The group of SSA values for the nest's steps.
3744
- - `reduceOperands`: The reduction SSA values, if any.
3745
- - `reduceAttrs`: Attributes to store reduction operations, if any.
3746
3860
- `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to
3747
3861
LLVM.
3748
3862
}];
@@ -3751,12 +3865,12 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3751
3865
Variadic<Index>:$lowerBound,
3752
3866
Variadic<Index>:$upperBound,
3753
3867
Variadic<Index>:$step,
3754
- Variadic<AnyType>:$reduceOperands,
3755
- OptionalAttr<ArrayAttr>:$reduceAttrs,
3756
3868
OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
3757
3869
);
3758
3870
3759
- let arguments = !con(opArgs, fir_LocalSpecifier.arguments);
3871
+ let arguments = !con(opArgs,
3872
+ fir_LocalSpecifier.arguments,
3873
+ fir_ReduceSpecifier.arguments);
3760
3874
3761
3875
let regions = (region SizedRegion<1>:$region);
3762
3876
@@ -3777,12 +3891,18 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3777
3891
getNumLocalOperands());
3778
3892
}
3779
3893
3894
+ mlir::Block::BlockArgListType getRegionReduceArgs() {
3895
+ return getBody()->getArguments().slice(getNumInductionVars()
3896
+ + getNumLocalOperands(),
3897
+ getNumReduceOperands());
3898
+ }
3899
+
3780
3900
/// Number of operands controlling the loop
3781
3901
unsigned getNumControlOperands() { return getLowerBound().size() * 3; }
3782
3902
3783
3903
// Get Number of reduction operands
3784
3904
unsigned getNumReduceOperands() {
3785
- return getReduceOperands ().size();
3905
+ return getReduceVars ().size();
3786
3906
}
3787
3907
3788
3908
mlir::Operation::operand_range getLocalOperands() {
0 commit comments