An issue in ConvertKrnlToAffine Pass used in PR#2756

This issue was observed when applying `--convert-krnl-to-affine` to generated KrnlIR in PR https://github.com/onnx/onnx-mlir/pull/2756 . In the PR, following kinds of KrnlIR are generated, and lowered to affine by using `--convert-krnl-to-affine`, but I got an error in some cases.
```
%loop0 = krnl.define_loops
   krnl.parallel(%loop0) : !krnl.loop
   krnl.iterate(%loop0) with (%loop0 -> %arg2 = 0 to 2){
      %idx0 = krnl.get_induction_var_value(%loop0) : (!krnl.loop) -> (index)
      %0 = arith.cmpi eq, %idx0, 0 : index
      scf.if %0 {
         // KrnlIR  for the ONNX operations (Thread 0)
      }
      %1 = arith.cmpi eq, %i, 1 : index
      scf.if  %1 {
         // KrnlIR for the ONNX operations (Thread 1)
      }
 } // krnl.iterate
```
- Error message
```
onnx-mlir-opt: /home/imaihal/onnx-mlir/src/Conversion/KrnlToAffine/ConvertKrnlToAffine.cpp:238: void onnx_mlir::krnl::LoopBodyMover::moveOne(ml
ir::Value, llvm::SmallDenseMap<mlir::Value, mlir::Operation*, 4>&, bool): Assertion `insertPt != loopBody.end() && "Expecting insertPt in the l
oop"' failed.
```

- Example to reproduce the error:
Command : onnx-mlir-opt  --convert-krnl-to-affine [issue-ng.mlir.txt](https://github.com/onnx/onnx-mlir/files/14735795/issue-ng.mlir.txt)
  This file includes single MatMulOp in ForkOp and the results of MatMulOp is used in AddOp.

- Other observations:
  No error: When removing the AddOp after ParallelOp, the error disappeared. (file: [issue-ok.mlir.txt](https://github.com/onnx/onnx-mlir/files/14735804/issue-ok.mlir.txt))
  No error: When comments out `scf.if`, the error disappeared (file: [issue-ok2.mlir.txt](https://github.com/onnx/onnx-mlir/files/14735821/issue-ok2.mlir.txt))
  Error: Comment out `krnl.parallel`, the error still appeared.


```
  func.func @main_graph(%arg0: memref<8x1024x768xf32>) -> memref<8x1024x768xf32> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0.000000e+00 : f32
    %c768 = arith.constant 768 : index
    %c1024 = arith.constant 1024 : index
    %c8 = arith.constant 8 : index
    %0 = "krnl.global"() {name = "constant_0", shape = [768, 768], value = dense<1.000000e+00> : tensor<768x768xf32>} : () -> memref<768x768xf32>
    %1 = "krnl.global"() {name = "constant_1", shape = [768], value = dense<1.000000e+00> : tensor<768xf32>} : () -> memref<768xf32>
    %alloc = memref.alloc() {alignment = 16 : i64} : memref<8x1024x768xf32>
    %2 = krnl.define_loops 1
    krnl.parallel(%2) : !krnl.loop
    krnl.iterate(%2) with (%2 -> %arg1 = 0 to 1){
      %4 = krnl.get_induction_var_value(%2) : (!krnl.loop) -> index
      %5 = arith.cmpi eq, %4, %c0 : index
      scf.if %5 {
        // MatMul
        krnl.memset %alloc, %cst : memref<8x1024x768xf32>
        %6 = krnl.define_loops 1
        krnl.iterate(%6) with (%6 -> %arg2 = %c0 to %c8){
          %7 = krnl.get_induction_var_value(%6) : (!krnl.loop) -> index
          %8:3 = krnl.define_loops 3
          %loop_block_1, %loop_local_2 = krnl.block %8#0 4 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
          %loop_block_3, %loop_local_4 = krnl.block %8#1 8 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
          %loop_block_5, %loop_local_6 = krnl.block %8#2 8 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
          krnl.permute(%loop_block_1, %loop_local_2, %loop_block_3, %loop_local_4, %loop_block_5, %loop_local_6) [0, 3, 1, 4, 2, 5] : !krnl.loop, !krnl.loop, !krnl.loop, !krnl.l\
oop, !krnl.loop, !krnl.loop
          krnl.iterate(%loop_block_1, %loop_block_3, %loop_block_5) with (%8#0 -> %arg3 = %c0 to %c1024, %8#1 -> %arg4 = %c0 to %c768, %8#2 -> %arg5 = %c0 to %c768){
            %9:3 = krnl.get_induction_var_value(%loop_block_1, %loop_block_3, %loop_block_5) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index)
            krnl.matmul %arg0[%7, %c0, %c0], %0[%c0, %c0], %alloc[%7, %c0, %c0], (%loop_local_2, %loop_local_4, %loop_local_6), (%9#0, %9#1, %9#2), (%c1024, %c768, %c768) {aTile\
Size = [], bTileSize = [], cTileSize = [], computeTileSize = [4, 8, 8]} : memref<8x1024x768xf32>, memref<768x768xf32>, memref<8x1024x768xf32>, (!krnl.loop, !krnl.loop, !krnl.loo\
p)
          }
        }
      }
    }
//    return %alloc : memref<8x1024x768xf32>
    // Add
    %alloc_0 = memref.alloc() {alignment = 16 : i64} : memref<8x1024x768xf32>
    %3:3 = krnl.define_loops 3
    %loop_block, %loop_local = krnl.block %3#2 32 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
    krnl.iterate(%3#0, %3#1, %loop_block) with (%3#0 -> %arg1 = 0 to 8, %3#1 -> %arg2 = 0 to 1024, %3#2 -> %arg3 = 0 to 768){
      %4:3 = krnl.get_induction_var_value(%3#0, %3#1, %loop_block) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index)
      %5 = vector.load %alloc[%4#0, %4#1, %4#2] : memref<8x1024x768xf32>, vector<32xf32>
      %6 = vector.load %1[%4#2] : memref<768xf32>, vector<32xf32>
      %7 = arith.addf %5, %6 : vector<32xf32>
      vector.store %7, %alloc_0[%4#0, %4#1, %4#2] : memref<8x1024x768xf32>, vector<32xf32>
    }
    return %alloc_0 : memref<8x1024x768xf32>
  }
```



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

An issue in ConvertKrnlToAffine Pass used in PR#2756 #2773

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

An issue in ConvertKrnlToAffine Pass used in PR#2756 #2773

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions