|
| 1 | +; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s |
| 2 | + |
| 3 | +target datalayout = "e-m:e-i64:64-n32:64" |
| 4 | +target triple = "powerpc64le-unknown-linux-gnu" |
| 5 | + |
| 6 | +; Check delinearization in loop cache analysis can handle fixed-size arrays. |
| 7 | +; The IR is copied from llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll |
| 8 | + |
| 9 | +; CHECK: Loop 'for.body' has cost = 4186116 |
| 10 | +; CHECK: Loop 'for.body4' has cost = 128898 |
| 11 | + |
| 12 | +;; #define N 1024 |
| 13 | +;; #define M 2048 |
| 14 | +;; void t1(int a[N][M]) { |
| 15 | +;; for (int i = 0; i < N-1; ++i) |
| 16 | +;; for (int j = 2; j < M; ++j) |
| 17 | +;; a[i][j] = a[i+1][j-2]; |
| 18 | +;; } |
| 19 | + |
| 20 | +define void @t1([2048 x i32]* %a) { |
| 21 | +entry: |
| 22 | + br label %for.body |
| 23 | + |
| 24 | +for.body: ; preds = %entry, %for.inc11 |
| 25 | + %indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ] |
| 26 | + br label %for.body4 |
| 27 | + |
| 28 | +for.body4: ; preds = %for.body, %for.body4 |
| 29 | + %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ] |
| 30 | + %0 = add nuw nsw i64 %indvars.iv4, 1 |
| 31 | + %1 = add nsw i64 %indvars.iv, -2 |
| 32 | + %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1 |
| 33 | + %2 = load i32, i32* %arrayidx6, align 4 |
| 34 | + %a_gep = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 0 |
| 35 | + %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a_gep, i64 %indvars.iv4, i64 %indvars.iv |
| 36 | + store i32 %2, i32* %arrayidx10, align 4 |
| 37 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| 38 | + %exitcond = icmp ne i64 %indvars.iv.next, 2048 |
| 39 | + br i1 %exitcond, label %for.body4, label %for.inc11 |
| 40 | + |
| 41 | +for.inc11: ; preds = %for.body4 |
| 42 | + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 |
| 43 | + %exitcond7 = icmp ne i64 %indvars.iv.next5, 1023 |
| 44 | + br i1 %exitcond7, label %for.body, label %for.end13 |
| 45 | + |
| 46 | +for.end13: ; preds = %for.inc11 |
| 47 | + ret void |
| 48 | +} |
| 49 | + |
| 50 | + |
| 51 | +; CHECK: Loop 'for.body' has cost = 4186116 |
| 52 | +; CHECK: Loop 'for.body4' has cost = 128898 |
| 53 | + |
| 54 | +define void @t2([2048 x i32]* %a) { |
| 55 | +entry: |
| 56 | + br label %for.body |
| 57 | + |
| 58 | +for.body: ; preds = %entry, %for.inc11 |
| 59 | + %indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ] |
| 60 | + br label %for.body4 |
| 61 | + |
| 62 | +for.body4: ; preds = %for.body, %for.body4 |
| 63 | + %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ] |
| 64 | + %0 = add nuw nsw i64 %indvars.iv4, 1 |
| 65 | + %1 = add nsw i64 %indvars.iv, -2 |
| 66 | + %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1 |
| 67 | + %2 = load i32, i32* %arrayidx6, align 4 |
| 68 | + %call = call [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %a) |
| 69 | + %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %call, i64 %indvars.iv4, i64 %indvars.iv |
| 70 | + store i32 %2, i32* %arrayidx10, align 4 |
| 71 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| 72 | + %exitcond = icmp ne i64 %indvars.iv.next, 2048 |
| 73 | + br i1 %exitcond, label %for.body4, label %for.inc11 |
| 74 | + |
| 75 | +for.inc11: ; preds = %for.body4 |
| 76 | + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 |
| 77 | + %exitcond7 = icmp ne i64 %indvars.iv.next5, 1023 |
| 78 | + br i1 %exitcond7, label %for.body, label %for.end13 |
| 79 | + |
| 80 | +for.end13: ; preds = %for.inc11 |
| 81 | + ret void |
| 82 | +} |
| 83 | + |
| 84 | +declare [2048 x i32]* @func_with_returned_arg([2048 x i32]* returned %arg) |
| 85 | + |
| 86 | +; CHECK: Loop 'for.body' has cost = 4472886244958208 |
| 87 | +; CHECK: Loop 'for.body4' has cost = 4472886244958208 |
| 88 | +; CHECK: Loop 'for.body8' has cost = 4472886244958208 |
| 89 | +; CHECK: Loop 'for.body12' has cost = 4472886244958208 |
| 90 | +; CHECK: Loop 'for.body16' has cost = 137728168833024 |
| 91 | + |
| 92 | + |
| 93 | +;; #define N 1024 |
| 94 | +;; #define M 2048 |
| 95 | +;; void t3(int a[][N][N][N][M]) { |
| 96 | +;; for (int i1 = 0; i1 < N-1; ++i1) |
| 97 | +;; for (int i2 = 2; i2 < N; ++i2) |
| 98 | +;; for (int i3 = 0; i3 < N; ++i3) |
| 99 | +;; for (int i4 = 3; i4 < N; ++i4) |
| 100 | +;; for (int i5 = 0; i5 < M-2; ++i5) |
| 101 | +;; a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2]; |
| 102 | +;; } |
| 103 | + |
| 104 | +define void @t3([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) { |
| 105 | +entry: |
| 106 | + br label %for.body |
| 107 | + |
| 108 | +for.body: ; preds = %entry, %for.inc46 |
| 109 | + %indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ] |
| 110 | + br label %for.body4 |
| 111 | + |
| 112 | +for.body4: ; preds = %for.body, %for.inc43 |
| 113 | + %indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ] |
| 114 | + br label %for.body8 |
| 115 | + |
| 116 | +for.body8: ; preds = %for.body4, %for.inc40 |
| 117 | + %indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ] |
| 118 | + br label %for.body12 |
| 119 | + |
| 120 | +for.body12: ; preds = %for.body8, %for.inc37 |
| 121 | + %indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ] |
| 122 | + br label %for.body16 |
| 123 | + |
| 124 | +for.body16: ; preds = %for.body12, %for.body16 |
| 125 | + %indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ] |
| 126 | + %0 = add nuw nsw i64 %indvars.iv18, 1 |
| 127 | + %1 = add nsw i64 %indvars.iv14, -2 |
| 128 | + %2 = add nsw i64 %indvars.iv7, -3 |
| 129 | + %3 = add nuw nsw i64 %indvars.iv, 2 |
| 130 | + %arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3 |
| 131 | + %4 = load i32, i32* %arrayidx26, align 4 |
| 132 | + %arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv |
| 133 | + store i32 %4, i32* %arrayidx36, align 4 |
| 134 | + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| 135 | + %exitcond = icmp ne i64 %indvars.iv.next, 2046 |
| 136 | + br i1 %exitcond, label %for.body16, label %for.inc37 |
| 137 | + |
| 138 | +for.inc37: ; preds = %for.body16 |
| 139 | + %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 |
| 140 | + %exitcond10 = icmp ne i64 %indvars.iv.next8, 1024 |
| 141 | + br i1 %exitcond10, label %for.body12, label %for.inc40 |
| 142 | + |
| 143 | +for.inc40: ; preds = %for.inc37 |
| 144 | + %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1 |
| 145 | + %exitcond13 = icmp ne i64 %indvars.iv.next12, 1024 |
| 146 | + br i1 %exitcond13, label %for.body8, label %for.inc43 |
| 147 | + |
| 148 | +for.inc43: ; preds = %for.inc40 |
| 149 | + %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1 |
| 150 | + %exitcond17 = icmp ne i64 %indvars.iv.next15, 1024 |
| 151 | + br i1 %exitcond17, label %for.body4, label %for.inc46 |
| 152 | + |
| 153 | +for.inc46: ; preds = %for.inc43 |
| 154 | + %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1 |
| 155 | + %exitcond21 = icmp ne i64 %indvars.iv.next19, 1023 |
| 156 | + br i1 %exitcond21, label %for.body, label %for.end48 |
| 157 | + |
| 158 | +for.end48: ; preds = %for.inc46 |
| 159 | + ret void |
| 160 | +} |
0 commit comments