@@ -8304,57 +8304,35 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8304
8304
});
8305
8305
// FIXME: this must be moved to TTI for better estimation.
8306
8306
unsigned EltsPerVector = getPartNumElems(VL.size(), NumParts);
8307
- auto CheckPerRegistersShuffle = [&](MutableArrayRef<int> Mask,
8308
- SmallVectorImpl<unsigned> &Indices)
8309
- -> std::optional<TTI::ShuffleKind> {
8307
+ auto CheckPerRegistersShuffle =
8308
+ [&](MutableArrayRef<int> Mask,
8309
+ SmallVector<int> Indices) -> std::optional<TTI::ShuffleKind> {
8310
8310
if (NumElts <= EltsPerVector)
8311
8311
return std::nullopt;
8312
- int OffsetReg0 =
8313
- alignDown(std::accumulate(Mask.begin(), Mask.end(), INT_MAX,
8314
- [](int S, int I) {
8315
- if (I == PoisonMaskElem)
8316
- return S;
8317
- return std::min(S, I);
8318
- }),
8319
- EltsPerVector);
8320
- int OffsetReg1 = OffsetReg0;
8321
8312
DenseSet<int> RegIndices;
8322
8313
// Check that if trying to permute same single/2 input vectors.
8323
8314
TTI::ShuffleKind ShuffleKind = TTI::SK_PermuteSingleSrc;
8324
8315
int FirstRegId = -1;
8325
- Indices.assign(1, OffsetReg0 );
8326
- for (auto [Pos, I] : enumerate( Mask) ) {
8316
+ Indices.assign(1, -1 );
8317
+ for (int &I : Mask) {
8327
8318
if (I == PoisonMaskElem)
8328
8319
continue;
8329
- int Idx = I - OffsetReg0;
8330
- int RegId =
8331
- (Idx / NumElts) * NumParts + (Idx % NumElts) / EltsPerVector;
8320
+ int RegId = (I / NumElts) * NumParts + (I % NumElts) / EltsPerVector;
8332
8321
if (FirstRegId < 0)
8333
8322
FirstRegId = RegId;
8334
8323
RegIndices.insert(RegId);
8335
8324
if (RegIndices.size() > 2)
8336
8325
return std::nullopt;
8337
8326
if (RegIndices.size() == 2) {
8338
8327
ShuffleKind = TTI::SK_PermuteTwoSrc;
8339
- if (Indices.size() == 1) {
8340
- OffsetReg1 = alignDown(
8341
- std::accumulate(
8342
- std::next(Mask.begin(), Pos), Mask.end(), INT_MAX,
8343
- [&](int S, int I) {
8344
- if (I == PoisonMaskElem)
8345
- return S;
8346
- int RegId = ((I - OffsetReg0) / NumElts) * NumParts +
8347
- ((I - OffsetReg0) % NumElts) / EltsPerVector;
8348
- if (RegId == FirstRegId)
8349
- return S;
8350
- return std::min(S, I);
8351
- }),
8352
- EltsPerVector);
8353
- Indices.push_back(OffsetReg1);
8354
- }
8355
- Idx = I - OffsetReg1;
8328
+ if (Indices.size() == 1)
8329
+ Indices.push_back(-1);
8356
8330
}
8357
- I = (Idx % NumElts) % EltsPerVector +
8331
+ if (RegId == FirstRegId)
8332
+ Indices.front() = I % NumElts;
8333
+ else
8334
+ Indices.back() = I % NumElts;
8335
+ I = (I % NumElts) % EltsPerVector +
8358
8336
(RegId == FirstRegId ? 0 : EltsPerVector);
8359
8337
}
8360
8338
return ShuffleKind;
@@ -8371,7 +8349,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8371
8349
Part * EltsPerVector, getNumElems(Mask.size(), EltsPerVector, Part));
8372
8350
SmallVector<int> SubMask(EltsPerVector, PoisonMaskElem);
8373
8351
copy(MaskSlice, SubMask.begin());
8374
- SmallVector<unsigned, 2 > Indices;
8352
+ SmallVector<int > Indices;
8375
8353
std::optional<TTI::ShuffleKind> RegShuffleKind =
8376
8354
CheckPerRegistersShuffle(SubMask, Indices);
8377
8355
if (!RegShuffleKind) {
@@ -8389,21 +8367,12 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8389
8367
FixedVectorType::get(ScalarTy, EltsPerVector),
8390
8368
SubMask);
8391
8369
}
8392
- for (unsigned Idx : Indices) {
8370
+ for (int Idx : Indices) {
8393
8371
Cost += ::getShuffleCost(TTI, TTI::SK_ExtractSubvector,
8394
8372
FixedVectorType::get(ScalarTy, NumElts),
8395
8373
std::nullopt, CostKind, Idx,
8396
8374
FixedVectorType::get(ScalarTy, EltsPerVector));
8397
8375
}
8398
- // Second attempt to check, if just a permute is better estimated than
8399
- // subvector extract.
8400
- SubMask.assign(NumElts, PoisonMaskElem);
8401
- copy(MaskSlice, SubMask.begin());
8402
- InstructionCost OriginalCost =
8403
- ::getShuffleCost(TTI, *ShuffleKinds[Part],
8404
- FixedVectorType::get(ScalarTy, NumElts), SubMask);
8405
- if (OriginalCost < Cost)
8406
- Cost = OriginalCost;
8407
8376
}
8408
8377
return Cost;
8409
8378
}
0 commit comments