@@ -244,12 +244,12 @@ Value LoopEmitter::genAddress(OpBuilder &builder, Location loc, TensorId tid,
244
244
Value LoopEmitter::genSegmentHigh (OpBuilder &builder, Location loc,
245
245
TensorId tid, Level lvl, Value pLo,
246
246
Value pHi) {
247
- SparseTensorLevel &level = *lvls[tid][lvl];
248
- const Value sameCrd = level .peekCrdAt (builder, loc, pLo);
247
+ SparseTensorLevel &stl = *lvls[tid][lvl];
248
+ const Value sameCrd = stl .peekCrdAt (builder, loc, pLo);
249
249
auto whileOp = builder.create <scf::WhileOp>(
250
250
loc, builder.getIndexType (), pLo,
251
251
/* beforeBuilder=*/
252
- [pHi, &level , sameCrd](OpBuilder &builder, Location loc, ValueRange ivs) {
252
+ [pHi, &stl , sameCrd](OpBuilder &builder, Location loc, ValueRange ivs) {
253
253
const auto pos = ivs[0 ];
254
254
Value inBound = builder.create <arith::CmpIOp>(
255
255
loc, arith::CmpIPredicate::ult, pos, pHi);
@@ -260,7 +260,7 @@ Value LoopEmitter::genSegmentHigh(OpBuilder &builder, Location loc,
260
260
// Load the next coordinates only when inbound (to avoid OOB
261
261
// accesses).
262
262
builder.setInsertionPointToStart (ifInBound.thenBlock ());
263
- Value crd = level .peekCrdAt (builder, loc, pos);
263
+ Value crd = stl .peekCrdAt (builder, loc, pos);
264
264
Value isSameCrd = builder.create <arith::CmpIOp>(
265
265
loc, arith::CmpIPredicate::eq, crd, sameCrd);
266
266
YIELD (isSameCrd);
@@ -1226,27 +1226,19 @@ void LoopEmitter::prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
1226
1226
1227
1227
const Value c0 = C_IDX (0 );
1228
1228
const Value c1 = C_IDX (1 );
1229
- const Value c2 = C_IDX (2 );
1230
1229
// Either the first level, or the previous level has been set.
1231
1230
// / FIXME: See the [CLARIFY_POSITS_LVL] note in the header.
1232
1231
assert (lvl == 0 || posits[tid][lvl - 1 ]);
1233
- if (isCompressedLT (lvlTp) || isLooseCompressedLT (lvlTp)) {
1234
- // TODO: eliminate the cast upon feature complete.
1235
- const Value mem =
1236
- isCompressedLT (lvlTp)
1237
- ? static_cast <CompressedLevel &>(*lvls[tid][lvl]).posBuffer
1238
- : static_cast <LooseCompressedLevel &>(*lvls[tid][lvl]).posBuffer ;
1239
-
1240
- Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1 ];
1241
- if (isLooseCompressedLT (lvlTp))
1242
- pLo = builder.create <arith::MulIOp>(loc, pLo, c2);
1243
- posits[tid][lvl] = genIndexLoad (builder, loc, mem, pLo);
1244
-
1245
- const Value pHi = ADDI (pLo, c1);
1246
- highs[tid][lvl] = genIndexLoad (builder, loc, mem, pHi);
1232
+ if (isCompressedLT (lvlTp) || isLooseCompressedLT (lvlTp) ||
1233
+ is2OutOf4LT (lvlTp)) {
1234
+
1235
+ Value pos = lvl == 0 ? c0 : posits[tid][lvl - 1 ];
1236
+ std::tie (posits[tid][lvl], highs[tid][lvl]) =
1237
+ lvls[tid][lvl]->peekRangeAt (builder, loc, pos);
1247
1238
return ;
1248
1239
}
1249
1240
if (isSingletonLT (lvlTp)) {
1241
+ // TODO: merge this as well when SparseTensorLevel support dedup.
1250
1242
const Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1 ];
1251
1243
posits[tid][lvl] = pLo;
1252
1244
@@ -1262,13 +1254,6 @@ void LoopEmitter::prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
1262
1254
: ADDI (pLo, c1);
1263
1255
return ;
1264
1256
}
1265
- if (is2OutOf4LT (lvlTp)) {
1266
- const Value pLo = lvl == 0 ? c0 : posits[tid][lvl - 1 ];
1267
- // Each 2:4 block has exactly two specified elements.
1268
- posits[tid][lvl] = MULI (pLo, c2);
1269
- highs[tid][lvl] = ADDI (posits[tid][lvl], c2);
1270
- return ;
1271
- }
1272
1257
llvm_unreachable (" Unrecognized level-type!" );
1273
1258
}
1274
1259
@@ -1824,18 +1809,11 @@ void LoopEmitter::genResolvedSliceBegin(OpBuilder &builder, Location loc,
1824
1809
auto [nxSz, stride] = sliceMeta[tid][lvl][1 ];
1825
1810
assert (stride == 1 && " Not yet implemented" );
1826
1811
Value sPtrBuf = slicePosBuffer[tid][lvl][0 ];
1827
- Value pHi, pLo;
1828
- if (lvl == 0 ) {
1829
- pLo = c0;
1830
- // TODO: eliminate the cast upon feature complete.pLo = c0;
1831
- Value pBuf = static_cast <CompressedLevel &>(*lvls[tid][0 ]).posBuffer ;
1832
- pHi = genIndexLoad (builder, loc, pBuf, c1);
1833
- } else {
1834
- // TODO: eliminate the cast upon feature complete.} else {
1835
- Value pBuf = static_cast <CompressedLevel &>(*lvls[tid][lvl]).posBuffer ;
1836
- pLo = genIndexLoad (builder, loc, pBuf, posits[tid][lvl - 1 ]);
1837
- pHi = genIndexLoad (builder, loc, pBuf, ADDI (posits[tid][lvl - 1 ], c1));
1838
- }
1812
+ const SparseTensorLevel &stl = *lvls[tid][lvl];
1813
+
1814
+ Value p = lvl == 0 ? c0 : posits[tid][lvl - 1 ];
1815
+ auto [pLo, pHi] = stl.peekRangeAt (builder, loc, p);
1816
+
1839
1817
// Fills out pIdxBuffer[tid][lvl][0] with [pLo, pHi]
1840
1818
updateSlicePos (builder, loc, sPtrBuf , pLo, c0, SlicePosKind::kLo );
1841
1819
updateSlicePos (builder, loc, sPtrBuf , pHi, c0, SlicePosKind::kHi );
@@ -1849,7 +1827,7 @@ void LoopEmitter::genResolvedSliceBegin(OpBuilder &builder, Location loc,
1849
1827
// nonempty. though we assume that even on empty sparse tensors, a non-empty
1850
1828
// ptr/idx buffer is allocated for each level so it would not cause OOB to
1851
1829
// avoid generating a ifOp here.
1852
- Value minCrd = lvls[tid][lvl]-> peekCrdAt (builder, loc, pLo);
1830
+ Value minCrd = stl. peekCrdAt (builder, loc, pLo);
1853
1831
1854
1832
// FIXME: We need the relative offset related to the base slice.
1855
1833
Value absOffset = offsetFromMinCoord (builder, loc, minCrd, nxSz, isNonEmpty);
@@ -1879,7 +1857,7 @@ void LoopEmitter::genResolvedSliceBegin(OpBuilder &builder, Location loc,
1879
1857
// }
1880
1858
void LoopEmitter::genUnResolvedSliceBegin (OpBuilder &builder, Location loc,
1881
1859
TensorId tid, Level lvl) {
1882
- Value c0 = C_IDX (0 ), c1 = C_IDX ( 1 ) ;
1860
+ Value c0 = C_IDX (0 );
1883
1861
unsigned depth = levelReducedDep[tid][lvl];
1884
1862
// The remaining slice size after reduction.
1885
1863
Value remSz = sliceMeta[tid][lvl][depth + 1 ].first ;
@@ -1929,17 +1907,14 @@ void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc,
1929
1907
1930
1908
ValueRange result = genUnResolvedSliceTreeTraverse (
1931
1909
builder, loc, tid, unResSlices, firstResLvl, reduc,
1932
- [this , c1, tid, lvl, sPtrBuf ](OpBuilder &builder, Location loc, Value iv,
1933
- MutableArrayRef<Value> reduc) {
1910
+ [this , tid, lvl, sPtrBuf ](OpBuilder &builder, Location loc, Value iv,
1911
+ MutableArrayRef<Value> reduc) {
1934
1912
Value &nonEmpty = reduc[0 ];
1935
1913
Value &minCrd = reduc[1 ];
1936
1914
Value &curTupleCnt = reduc[2 ];
1937
1915
1938
- Value pHi = ADDI (iv, c1);
1939
- // TODO: eliminate the cast upon feature complete.
1940
- Value pBuf = static_cast <CompressedLevel &>(*lvls[tid][lvl]).posBuffer ;
1941
- Value sPLo = genIndexLoad (builder, loc, pBuf, iv);
1942
- Value sPHi = genIndexLoad (builder, loc, pBuf, pHi);
1916
+ const SparseTensorLevel &stl = *lvls[tid][lvl];
1917
+ auto [sPLo , sPHi ] = stl.peekRangeAt (builder, loc, iv);
1943
1918
1944
1919
// isNonEmpty = isNonEmpty || lvlNonEmpty, i.e., as long as there is
1945
1920
// one non-empty lvl, the slice is non-empty.
@@ -1957,7 +1932,7 @@ void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc,
1957
1932
// }
1958
1933
OpBuilder::InsertionGuard guard (builder);
1959
1934
builder.setInsertionPointToStart (ifNonEmpty.thenBlock ());
1960
- Value curC = lvls[tid][lvl]-> peekCrdAt (builder, loc, sPLo );
1935
+ Value curC = stl. peekCrdAt (builder, loc, sPLo );
1961
1936
Value isSmaller = CMPI (ult, curC, minCrd);
1962
1937
Value newMin = SELECT (isSmaller, curC, minCrd);
1963
1938
YIELD (newMin);
0 commit comments