@@ -167,14 +167,6 @@ static void updateSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf,
167
167
Value pPtr) {
168
168
builder.create <memref::StoreOp>(loc, pPtr, sPosBuf , C_IDX (1 ));
169
169
}
170
- static Value loadSlicePosTupleNum (OpBuilder &builder, Location loc,
171
- Value sPosBuf ) {
172
- return genIndexLoad (builder, loc, sPosBuf , C_IDX (0 ));
173
- }
174
- static void updateSlicePosTupleNum (OpBuilder &builder, Location loc, Value num,
175
- Value sPosBuf ) {
176
- builder.create <memref::StoreOp>(loc, num, sPosBuf , C_IDX (0 ));
177
- }
178
170
179
171
// Gets and sets position values for slice-driven loops.
180
172
enum class SlicePosKind { kLo , kHi , kNext };
@@ -405,7 +397,7 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
405
397
sliceMeta[tid].assign (lvlRank, std::vector<std::pair<Value, unsigned >>());
406
398
sliceStack[tid].emplace_back (/* minCrd=*/ Value (),
407
399
/* offset=*/ Value (), /* isNonEmpty*/ Value (),
408
- std::nullopt, 0 );
400
+ /* posTupleNum= */ Value (), std::nullopt, 0 );
409
401
if (dimGetter && !isSynTensor (tid)) {
410
402
for (Level l = 0 ; l < lvlRank; l++) {
411
403
dependentLvlMap[tid][l] = dimGetter (tid, l);
@@ -1797,7 +1789,7 @@ ValueRange LoopEmitter::genUnResolvedSliceTreeTraverse(
1797
1789
unsigned depth = frontSlice.depth - 1 ;
1798
1790
Value offset = frontSlice.offset ;
1799
1791
Value sPtrBuf = slicePosBuffer[tid][firstLvl][depth];
1800
- Value mSz = loadSlicePosTupleNum (builder, loc, sPtrBuf ) ;
1792
+ Value mSz = frontSlice. posTupleNum ;
1801
1793
outerMost = builder.create <scf::ForOp>(
1802
1794
loc, c0, mSz , c1, innerArgs,
1803
1795
[this , tid, firstLvl, offset, sPtrBuf , &ip, &pos,
@@ -1908,7 +1900,7 @@ void LoopEmitter::genResolvedSliceBegin(OpBuilder &builder, Location loc,
1908
1900
// Dense slice begin is trivial.
1909
1901
sliceStack[tid].emplace_back (/* minCoord=*/ c0, /* offset=*/ c0,
1910
1902
/* nonEmpty=*/ constantI1 (builder, loc, true ),
1911
- lvl, /* depth=*/ 1 );
1903
+ c0, lvl, /* depth=*/ 1 );
1912
1904
return ;
1913
1905
}
1914
1906
auto [nxSz, stride] = sliceMeta[tid][lvl][1 ];
@@ -1924,12 +1916,13 @@ void LoopEmitter::genResolvedSliceBegin(OpBuilder &builder, Location loc,
1924
1916
pHi = genIndexLoad (builder, loc, positionsBuffers[tid][lvl],
1925
1917
ADDI (posits[tid][lvl - 1 ], c1));
1926
1918
}
1927
- // Fills out pIdxBuffer[tid][lvl][0] with [/*memSize =*/4, 0, pLo, pHi]
1928
- updateSlicePosTupleNum (builder, loc, c1, sPtrBuf );
1919
+ // Fills out pIdxBuffer[tid][lvl][0] with [0, pLo, pHi]
1929
1920
updateSlicePosPtr (builder, loc, sPtrBuf , c0);
1930
1921
updateSlicePos (builder, loc, sPtrBuf , pLo, c0, SlicePosKind::kLo );
1931
1922
updateSlicePos (builder, loc, sPtrBuf , pHi, c0, SlicePosKind::kHi );
1932
-
1923
+ // Slice over a resolved parent, we only need one pair of pos hi and lo to
1924
+ // specify the current slice.
1925
+ Value tupleNum = c1;
1933
1926
// This is an non empty tensor if pLo < pHi.
1934
1927
Value isNonEmpty = CMPI (ult, pLo, pHi);
1935
1928
// The minimal coord must be at the first on ordered level.
@@ -1941,7 +1934,7 @@ void LoopEmitter::genResolvedSliceBegin(OpBuilder &builder, Location loc,
1941
1934
1942
1935
// FIXME: We need the relative offset related to the base slice.
1943
1936
Value absOffset = offsetFromMinCoord (builder, loc, minCrd, nxSz, isNonEmpty);
1944
- sliceStack[tid].emplace_back (minCrd, absOffset, isNonEmpty, lvl,
1937
+ sliceStack[tid].emplace_back (minCrd, absOffset, isNonEmpty, tupleNum, lvl,
1945
1938
/* depth=*/ 1 );
1946
1939
}
1947
1940
@@ -1973,8 +1966,8 @@ void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc,
1973
1966
Value remSz = sliceMeta[tid][lvl][depth + 1 ].first ;
1974
1967
// Dense slice begin is trivial
1975
1968
if (isDenseLT (lvlTypes[tid][lvl])) {
1976
- sliceStack[tid].emplace_back (c0, c0, constantI1 (builder, loc, false ), lvl ,
1977
- depth + 1 );
1969
+ sliceStack[tid].emplace_back (c0, c0, constantI1 (builder, loc, false ), c0 ,
1970
+ lvl, depth + 1 );
1978
1971
return ;
1979
1972
}
1980
1973
@@ -2064,11 +2057,11 @@ void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc,
2064
2057
Value minCrd = result[1 ];
2065
2058
// Two metadata [memSize, idx].
2066
2059
// TODO: Can use an SSA value for these two metadata
2067
- updateSlicePosTupleNum (builder, loc, result[2 ], sPtrBuf );
2068
2060
updateSlicePosPtr (builder, loc, sPtrBuf , c0);
2069
2061
// FIXME: we need the relative offset related to the base slice.
2070
2062
Value absOffset = offsetFromMinCoord (builder, loc, minCrd, remSz, isNonEmpty);
2071
- sliceStack[tid].emplace_back (minCrd, absOffset, isNonEmpty, lvl, depth + 1 );
2063
+ sliceStack[tid].emplace_back (minCrd, absOffset, isNonEmpty, result[2 ], lvl,
2064
+ depth + 1 );
2072
2065
}
2073
2066
2074
2067
bool LoopEmitter::genSliceBegin (OpBuilder &builder, Location loc, TensorId tid,
@@ -2212,10 +2205,10 @@ LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc,
2212
2205
// offset = minCrd - size + 1;
2213
2206
// }
2214
2207
builder.setInsertionPointToStart (&ifOp.getElseRegion ().front ());
2215
- reduc[2 ] = absOffset; // restore value.
2216
- Value mSz = loadSlicePosTupleNum (builder, loc, sPtrBuf ); // memSize
2217
- reduc[0 ] = lvlSizes[tid][lvl]; // next min coord
2218
- reduc[1 ] = constantI1 (builder, loc, false ); // isNonEmpty
2208
+ reduc[2 ] = absOffset; // restore value.
2209
+ Value mSz = info. posTupleNum ; // tuple number.
2210
+ reduc[0 ] = lvlSizes[tid][lvl]; // next min coord
2211
+ reduc[1 ] = constantI1 (builder, loc, false ); // isNonEmpty
2219
2212
auto loopArgs = static_cast <ValueRange>(reduc).drop_back ();
2220
2213
auto forOp = scf::buildLoopNest (
2221
2214
builder, loc, c0, mSz , c1, loopArgs,
0 commit comments