-
Notifications
You must be signed in to change notification settings - Fork 359
Open
Description
This issue was observed when applying --convert-krnl-to-affine
to generated KrnlIR in PR #2756 . In the PR, following kinds of KrnlIR are generated, and lowered to affine by using --convert-krnl-to-affine
, but I got an error in some cases.
%loop0 = krnl.define_loops
krnl.parallel(%loop0) : !krnl.loop
krnl.iterate(%loop0) with (%loop0 -> %arg2 = 0 to 2){
%idx0 = krnl.get_induction_var_value(%loop0) : (!krnl.loop) -> (index)
%0 = arith.cmpi eq, %idx0, 0 : index
scf.if %0 {
// KrnlIR for the ONNX operations (Thread 0)
}
%1 = arith.cmpi eq, %i, 1 : index
scf.if %1 {
// KrnlIR for the ONNX operations (Thread 1)
}
} // krnl.iterate
- Error message
onnx-mlir-opt: /home/imaihal/onnx-mlir/src/Conversion/KrnlToAffine/ConvertKrnlToAffine.cpp:238: void onnx_mlir::krnl::LoopBodyMover::moveOne(ml
ir::Value, llvm::SmallDenseMap<mlir::Value, mlir::Operation*, 4>&, bool): Assertion `insertPt != loopBody.end() && "Expecting insertPt in the l
oop"' failed.
-
Example to reproduce the error:
Command : onnx-mlir-opt --convert-krnl-to-affine issue-ng.mlir.txt
This file includes single MatMulOp in ForkOp and the results of MatMulOp is used in AddOp. -
Other observations:
No error: When removing the AddOp after ParallelOp, the error disappeared. (file: issue-ok.mlir.txt)
No error: When comments outscf.if
, the error disappeared (file: issue-ok2.mlir.txt)
Error: Comment outkrnl.parallel
, the error still appeared.
func.func @main_graph(%arg0: memref<8x1024x768xf32>) -> memref<8x1024x768xf32> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%c768 = arith.constant 768 : index
%c1024 = arith.constant 1024 : index
%c8 = arith.constant 8 : index
%0 = "krnl.global"() {name = "constant_0", shape = [768, 768], value = dense<1.000000e+00> : tensor<768x768xf32>} : () -> memref<768x768xf32>
%1 = "krnl.global"() {name = "constant_1", shape = [768], value = dense<1.000000e+00> : tensor<768xf32>} : () -> memref<768xf32>
%alloc = memref.alloc() {alignment = 16 : i64} : memref<8x1024x768xf32>
%2 = krnl.define_loops 1
krnl.parallel(%2) : !krnl.loop
krnl.iterate(%2) with (%2 -> %arg1 = 0 to 1){
%4 = krnl.get_induction_var_value(%2) : (!krnl.loop) -> index
%5 = arith.cmpi eq, %4, %c0 : index
scf.if %5 {
// MatMul
krnl.memset %alloc, %cst : memref<8x1024x768xf32>
%6 = krnl.define_loops 1
krnl.iterate(%6) with (%6 -> %arg2 = %c0 to %c8){
%7 = krnl.get_induction_var_value(%6) : (!krnl.loop) -> index
%8:3 = krnl.define_loops 3
%loop_block_1, %loop_local_2 = krnl.block %8#0 4 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
%loop_block_3, %loop_local_4 = krnl.block %8#1 8 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
%loop_block_5, %loop_local_6 = krnl.block %8#2 8 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
krnl.permute(%loop_block_1, %loop_local_2, %loop_block_3, %loop_local_4, %loop_block_5, %loop_local_6) [0, 3, 1, 4, 2, 5] : !krnl.loop, !krnl.loop, !krnl.loop, !krnl.l\
oop, !krnl.loop, !krnl.loop
krnl.iterate(%loop_block_1, %loop_block_3, %loop_block_5) with (%8#0 -> %arg3 = %c0 to %c1024, %8#1 -> %arg4 = %c0 to %c768, %8#2 -> %arg5 = %c0 to %c768){
%9:3 = krnl.get_induction_var_value(%loop_block_1, %loop_block_3, %loop_block_5) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index)
krnl.matmul %arg0[%7, %c0, %c0], %0[%c0, %c0], %alloc[%7, %c0, %c0], (%loop_local_2, %loop_local_4, %loop_local_6), (%9#0, %9#1, %9#2), (%c1024, %c768, %c768) {aTile\
Size = [], bTileSize = [], cTileSize = [], computeTileSize = [4, 8, 8]} : memref<8x1024x768xf32>, memref<768x768xf32>, memref<8x1024x768xf32>, (!krnl.loop, !krnl.loop, !krnl.loo\
p)
}
}
}
}
// return %alloc : memref<8x1024x768xf32>
// Add
%alloc_0 = memref.alloc() {alignment = 16 : i64} : memref<8x1024x768xf32>
%3:3 = krnl.define_loops 3
%loop_block, %loop_local = krnl.block %3#2 32 : (!krnl.loop) -> (!krnl.loop, !krnl.loop)
krnl.iterate(%3#0, %3#1, %loop_block) with (%3#0 -> %arg1 = 0 to 8, %3#1 -> %arg2 = 0 to 1024, %3#2 -> %arg3 = 0 to 768){
%4:3 = krnl.get_induction_var_value(%3#0, %3#1, %loop_block) : (!krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index)
%5 = vector.load %alloc[%4#0, %4#1, %4#2] : memref<8x1024x768xf32>, vector<32xf32>
%6 = vector.load %1[%4#2] : memref<768xf32>, vector<32xf32>
%7 = arith.addf %5, %6 : vector<32xf32>
vector.store %7, %alloc_0[%4#0, %4#1, %4#2] : memref<8x1024x768xf32>, vector<32xf32>
}
return %alloc_0 : memref<8x1024x768xf32>
}
Metadata
Metadata
Assignees
Labels
No labels