Skip to content

An issue in ConvertKrnlToAffine Pass used in PR#2756 #2773

@imaihal

Description

@imaihal

This issue was observed when applying --convert-krnl-to-affine to generated KrnlIR in PR #2756 . In the PR, following kinds of KrnlIR are generated, and lowered to affine by using --convert-krnl-to-affine, but I got an error in some cases.

%loop0 = krnl.define_loops
   krnl.parallel(%loop0) : !krnl.loop
   krnl.iterate(%loop0) with (%loop0 -> %arg2 = 0 to 2){
      %idx0 = krnl.get_induction_var_value(%loop0) : (!krnl.loop) -> (index)
      %0 = arith.cmpi eq, %idx0, 0 : index
      scf.if %0 {
         // KrnlIR  for the ONNX operations (Thread 0)
      }
      %1 = arith.cmpi eq, %i, 1 : index
      scf.if  %1 {
         // KrnlIR for the ONNX operations (Thread 1)
      }
 } // krnl.iterate
  • Error message
onnx-mlir-opt: /home/imaihal/onnx-mlir/src/Conversion/KrnlToAffine/ConvertKrnlToAffine.cpp:238: void onnx_mlir::krnl::LoopBodyMover::moveOne(ml
ir::Value, llvm::SmallDenseMap<mlir::Value, mlir::Operation*, 4>&, bool): Assertion `insertPt != loopBody.end() && "Expecting insertPt in the l
oop"' failed.
  • Example to reproduce the error:
    Command : onnx-mlir-opt --convert-krnl-to-affine issue-ng.mlir.txt
    This file includes single MatMulOp in ForkOp and the results of MatMulOp is used in AddOp.

  • Other observations:
    No error: When removing the AddOp after ParallelOp, the error disappeared. (file: issue-ok.mlir.txt)
    No error: When comments out scf.if, the error disappeared (file: issue-ok2.mlir.txt)
    Error: Comment out krnl.parallel, the error still appeared.

  func.func @main_graph(%arg0: memref<8x1024x768xf32>) -> memref<8x1024x768xf32> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0.000000e+00 : f32
    %c768 = arith.constant 768 : index
    %c1024 = arith.constant 1024 : index
    %c8 = arith.constant 8 : index
    %0 = "krnl.global"() {name = "constant_0", shape = [768, 768], value = dense<1.000000e+00> : tensor<768x768xf32>} : () -> memref<768x768xf32>
    %1 = "krnl.global"() {name = "constant_1", shape = [768], value = dense<1.000000e+00> : tensor<768xf32>} : () -> memref<768xf32>
    %alloc = memref.alloc() {alignment = 16 : i64} : memref<8x1024x768xf32>
    %2 = krnl.define_loops 1
    krnl.parallel(%2) : !krnl.loop
    krnl.iterate(%2) with (%2 -> %arg1 = 0 to 1){
      %4 = krnl.get_induction_var_value(%2) : (!krnl.loop) -> index
      %5 = arith.cmpi eq, %4, %c0 : index
      scf.if %5 {
        // MatMul
        krnl.memset %alloc, %cst : memref<8x1024x768xf32>
        %6 = krnl.define_loops 1
        krnl.iterate(%6) with (%6 -> %arg2 = %c0 to %c8){
          %7 = krnl.get_induction_var_value(%6) : (!krnl.loop) -> index
          %8:3 = krnl.define_loops 3
          %loop_block_1, %loop_local_2 = krnl.block %8#0 4 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
          %loop_block_3, %loop_local_4 = krnl.block %8#1 8 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
          %loop_block_5, %loop_local_6 = krnl.block %8#2 8 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
          krnl.permute(%loop_block_1, %loop_local_2, %loop_block_3, %loop_local_4, %loop_block_5, %loop_local_6) [0, 3, 1, 4, 2, 5] : !krnl.loop, !krnl.loop, !krnl.loop, !krnl.l\
oop, !krnl.loop, !krnl.loop
          krnl.iterate(%loop_block_1, %loop_block_3, %loop_block_5) with (%8#0 -> %arg3 = %c0 to %c1024, %8#1 -> %arg4 = %c0 to %c768, %8#2 -> %arg5 = %c0 to %c768){
            %9:3 = krnl.get_induction_var_value(%loop_block_1, %loop_block_3, %loop_block_5) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index)
            krnl.matmul %arg0[%7, %c0, %c0], %0[%c0, %c0], %alloc[%7, %c0, %c0], (%loop_local_2, %loop_local_4, %loop_local_6), (%9#0, %9#1, %9#2), (%c1024, %c768, %c768) {aTile\
Size = [], bTileSize = [], cTileSize = [], computeTileSize = [4, 8, 8]} : memref<8x1024x768xf32>, memref<768x768xf32>, memref<8x1024x768xf32>, (!krnl.loop, !krnl.loop, !krnl.loo\
p)
          }
        }
      }
    }
//    return %alloc : memref<8x1024x768xf32>
    // Add
    %alloc_0 = memref.alloc() {alignment = 16 : i64} : memref<8x1024x768xf32>
    %3:3 = krnl.define_loops 3
    %loop_block, %loop_local = krnl.block %3#2 32 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
    krnl.iterate(%3#0, %3#1, %loop_block) with (%3#0 -> %arg1 = 0 to 8, %3#1 -> %arg2 = 0 to 1024, %3#2 -> %arg3 = 0 to 768){
      %4:3 = krnl.get_induction_var_value(%3#0, %3#1, %loop_block) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index)
      %5 = vector.load %alloc[%4#0, %4#1, %4#2] : memref<8x1024x768xf32>, vector<32xf32>
      %6 = vector.load %1[%4#2] : memref<768xf32>, vector<32xf32>
      %7 = arith.addf %5, %6 : vector<32xf32>
      vector.store %7, %alloc_0[%4#0, %4#1, %4#2] : memref<8x1024x768xf32>, vector<32xf32>
    }
    return %alloc_0 : memref<8x1024x768xf32>
  }

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions