Skip to content

Commit 69389a3

Browse files
committed
[IA][RISCV] Add support for vp.load/vp.store with shufflevector
1 parent 1cec5ff commit 69389a3

11 files changed

+661
-99
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -3157,11 +3157,11 @@ class TargetLoweringBase {
31573157
/// Lower an interleaved load to target specific intrinsics. Return
31583158
/// true on success.
31593159
///
3160-
/// \p LI is the vector load instruction.
3160+
/// \p LoadOp is a vector load or vp.load instruction.
31613161
/// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
31623162
/// \p Indices is the corresponding indices for each shufflevector.
31633163
/// \p Factor is the interleave factor.
3164-
virtual bool lowerInterleavedLoad(LoadInst *LI,
3164+
virtual bool lowerInterleavedLoad(Instruction *LoadOp,
31653165
ArrayRef<ShuffleVectorInst *> Shuffles,
31663166
ArrayRef<unsigned> Indices,
31673167
unsigned Factor) const {
@@ -3171,10 +3171,11 @@ class TargetLoweringBase {
31713171
/// Lower an interleaved store to target specific intrinsics. Return
31723172
/// true on success.
31733173
///
3174-
/// \p SI is the vector store instruction.
3174+
/// \p StoreOp is a vector store or vp.store instruction.
31753175
/// \p SVI is the shufflevector to RE-interleave the stored vector.
31763176
/// \p Factor is the interleave factor.
3177-
virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
3177+
virtual bool lowerInterleavedStore(Instruction *StoreOp,
3178+
ShuffleVectorInst *SVI,
31783179
unsigned Factor) const {
31793180
return false;
31803181
}

llvm/lib/CodeGen/InterleavedAccessPass.cpp

+150-27
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
//===----------------------------------------------------------------------===//
4646

4747
#include "llvm/ADT/ArrayRef.h"
48+
#include "llvm/ADT/BitVector.h"
4849
#include "llvm/ADT/DenseMap.h"
4950
#include "llvm/ADT/SetVector.h"
5051
#include "llvm/ADT/SmallVector.h"
@@ -100,11 +101,11 @@ class InterleavedAccessImpl {
100101
unsigned MaxFactor = 0u;
101102

102103
/// Transform an interleaved load into target specific intrinsics.
103-
bool lowerInterleavedLoad(LoadInst *LI,
104+
bool lowerInterleavedLoad(Instruction *LoadOp,
104105
SmallSetVector<Instruction *, 32> &DeadInsts);
105106

106107
/// Transform an interleaved store into target specific intrinsics.
107-
bool lowerInterleavedStore(StoreInst *SI,
108+
bool lowerInterleavedStore(Instruction *StoreOp,
108109
SmallSetVector<Instruction *, 32> &DeadInsts);
109110

110111
/// Transform a load and a deinterleave intrinsic into target specific
@@ -131,7 +132,7 @@ class InterleavedAccessImpl {
131132
/// made.
132133
bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
133134
SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
134-
LoadInst *LI);
135+
Instruction *LI);
135136
};
136137

137138
class InterleavedAccess : public FunctionPass {
@@ -250,10 +251,23 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
250251
}
251252

252253
bool InterleavedAccessImpl::lowerInterleavedLoad(
253-
LoadInst *LI, SmallSetVector<Instruction *, 32> &DeadInsts) {
254-
if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
254+
Instruction *LoadOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
255+
if (isa<ScalableVectorType>(LoadOp->getType()))
255256
return false;
256257

258+
if (auto *LI = dyn_cast<LoadInst>(LoadOp)) {
259+
if (!LI->isSimple())
260+
return false;
261+
} else if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
262+
assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load);
263+
// Require a constant mask and evl.
264+
if (!isa<ConstantVector>(VPLoad->getArgOperand(1)) ||
265+
!isa<ConstantInt>(VPLoad->getArgOperand(2)))
266+
return false;
267+
} else {
268+
llvm_unreachable("unsupported load operation");
269+
}
270+
257271
// Check if all users of this load are shufflevectors. If we encounter any
258272
// users that are extractelement instructions or binary operators, we save
259273
// them to later check if they can be modified to extract from one of the
@@ -265,7 +279,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
265279
// binop are the same load.
266280
SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
267281

268-
for (auto *User : LI->users()) {
282+
for (auto *User : LoadOp->users()) {
269283
auto *Extract = dyn_cast<ExtractElementInst>(User);
270284
if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
271285
Extracts.push_back(Extract);
@@ -294,13 +308,31 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
294308
unsigned Factor, Index;
295309

296310
unsigned NumLoadElements =
297-
cast<FixedVectorType>(LI->getType())->getNumElements();
311+
cast<FixedVectorType>(LoadOp->getType())->getNumElements();
298312
auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
299313
// Check if the first shufflevector is DE-interleave shuffle.
300314
if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
301315
NumLoadElements))
302316
return false;
303317

318+
// If this is a vp.load, record its mask (NOT shuffle mask).
319+
BitVector MaskedIndices(NumLoadElements);
320+
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
321+
auto *Mask = cast<ConstantVector>(VPLoad->getArgOperand(1));
322+
assert(cast<FixedVectorType>(Mask->getType())->getNumElements() ==
323+
NumLoadElements);
324+
if (auto *Splat = Mask->getSplatValue()) {
325+
// All-zeros mask, bail out early.
326+
if (Splat->isZeroValue())
327+
return false;
328+
} else {
329+
for (unsigned i = 0U; i < NumLoadElements; ++i) {
330+
if (Mask->getAggregateElement(i)->isZeroValue())
331+
MaskedIndices.set(i);
332+
}
333+
}
334+
}
335+
304336
// Holds the corresponding index for each DE-interleave shuffle.
305337
SmallVector<unsigned, 4> Indices;
306338

@@ -327,9 +359,9 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
327359

328360
assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
329361

330-
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
362+
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LoadOp)
331363
Indices.push_back(Index);
332-
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
364+
if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LoadOp)
333365
Indices.push_back(Index);
334366
}
335367

@@ -339,25 +371,61 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
339371
return false;
340372

341373
bool BinOpShuffleChanged =
342-
replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
374+
replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LoadOp);
375+
376+
// Check if we extract only the unmasked elements.
377+
if (MaskedIndices.any()) {
378+
if (any_of(Shuffles, [&](const auto *Shuffle) {
379+
ArrayRef<int> ShuffleMask = Shuffle->getShuffleMask();
380+
for (int Idx : ShuffleMask) {
381+
if (Idx < 0)
382+
continue;
383+
if (MaskedIndices.test(unsigned(Idx)))
384+
return true;
385+
}
386+
return false;
387+
})) {
388+
LLVM_DEBUG(dbgs() << "IA: trying to extract a masked element through "
389+
<< "shufflevector\n");
390+
return false;
391+
}
392+
}
393+
// Check if we extract only the elements within evl.
394+
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
395+
uint64_t EVL = cast<ConstantInt>(VPLoad->getArgOperand(2))->getZExtValue();
396+
if (any_of(Shuffles, [&](const auto *Shuffle) {
397+
ArrayRef<int> ShuffleMask = Shuffle->getShuffleMask();
398+
for (int Idx : ShuffleMask) {
399+
if (Idx < 0)
400+
continue;
401+
if (unsigned(Idx) >= EVL)
402+
return true;
403+
}
404+
return false;
405+
})) {
406+
LLVM_DEBUG(
407+
dbgs() << "IA: trying to extract an element out of EVL range\n");
408+
return false;
409+
}
410+
}
343411

344-
LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
412+
LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LoadOp << "\n");
345413

346414
// Try to create target specific intrinsics to replace the load and shuffles.
347-
if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
415+
if (!TLI->lowerInterleavedLoad(LoadOp, Shuffles, Indices, Factor)) {
348416
// If Extracts is not empty, tryReplaceExtracts made changes earlier.
349417
return !Extracts.empty() || BinOpShuffleChanged;
350418
}
351419

352420
DeadInsts.insert_range(Shuffles);
353421

354-
DeadInsts.insert(LI);
422+
DeadInsts.insert(LoadOp);
355423
return true;
356424
}
357425

358426
bool InterleavedAccessImpl::replaceBinOpShuffles(
359427
ArrayRef<ShuffleVectorInst *> BinOpShuffles,
360-
SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
428+
SmallVectorImpl<ShuffleVectorInst *> &Shuffles, Instruction *LoadOp) {
361429
for (auto *SVI : BinOpShuffles) {
362430
BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
363431
Type *BIOp0Ty = BI->getOperand(0)->getType();
@@ -380,9 +448,9 @@ bool InterleavedAccessImpl::replaceBinOpShuffles(
380448
<< "\n With : " << *NewSVI1 << "\n And : "
381449
<< *NewSVI2 << "\n And : " << *NewBI << "\n");
382450
RecursivelyDeleteTriviallyDeadInstructions(SVI);
383-
if (NewSVI1->getOperand(0) == LI)
451+
if (NewSVI1->getOperand(0) == LoadOp)
384452
Shuffles.push_back(NewSVI1);
385-
if (NewSVI2->getOperand(0) == LI)
453+
if (NewSVI2->getOperand(0) == LoadOp)
386454
Shuffles.push_back(NewSVI2);
387455
}
388456

@@ -454,27 +522,79 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
454522
}
455523

456524
bool InterleavedAccessImpl::lowerInterleavedStore(
457-
StoreInst *SI, SmallSetVector<Instruction *, 32> &DeadInsts) {
458-
if (!SI->isSimple())
459-
return false;
525+
Instruction *StoreOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
526+
Value *StoredValue;
527+
if (auto *SI = dyn_cast<StoreInst>(StoreOp)) {
528+
if (!SI->isSimple())
529+
return false;
530+
StoredValue = SI->getValueOperand();
531+
} else if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
532+
assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
533+
// Require a constant mask and evl.
534+
if (!isa<ConstantVector>(VPStore->getArgOperand(2)) ||
535+
!isa<ConstantInt>(VPStore->getArgOperand(3)))
536+
return false;
537+
StoredValue = VPStore->getArgOperand(0);
538+
} else {
539+
llvm_unreachable("unsupported store operation");
540+
}
460541

461-
auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
542+
auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
462543
if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
463544
return false;
464545

546+
unsigned NumStoredElements =
547+
cast<FixedVectorType>(SVI->getType())->getNumElements();
548+
// If this is a vp.store, record its mask (NOT shuffle mask).
549+
BitVector MaskedIndices(NumStoredElements);
550+
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
551+
auto *Mask = cast<ConstantVector>(VPStore->getArgOperand(2));
552+
assert(cast<FixedVectorType>(Mask->getType())->getNumElements() ==
553+
NumStoredElements);
554+
if (auto *Splat = Mask->getSplatValue()) {
555+
// All-zeros mask, bail out early.
556+
if (Splat->isZeroValue())
557+
return false;
558+
} else {
559+
for (unsigned i = 0U; i < NumStoredElements; ++i) {
560+
if (Mask->getAggregateElement(i)->isZeroValue())
561+
MaskedIndices.set(i);
562+
}
563+
}
564+
}
565+
465566
// Check if the shufflevector is RE-interleave shuffle.
466567
unsigned Factor;
467568
if (!isReInterleaveMask(SVI, Factor, MaxFactor))
468569
return false;
469570

470-
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
571+
// Check if we store only the unmasked elements.
572+
if (MaskedIndices.any()) {
573+
if (any_of(SVI->getShuffleMask(), [&](int Idx) {
574+
return Idx >= 0 && MaskedIndices.test(unsigned(Idx));
575+
})) {
576+
LLVM_DEBUG(dbgs() << "IA: trying to store a masked element\n");
577+
return false;
578+
}
579+
}
580+
// Check if we store only the elements within evl.
581+
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
582+
uint64_t EVL = cast<ConstantInt>(VPStore->getArgOperand(3))->getZExtValue();
583+
if (any_of(SVI->getShuffleMask(),
584+
[&](int Idx) { return Idx >= 0 && unsigned(Idx) >= EVL; })) {
585+
LLVM_DEBUG(dbgs() << "IA: trying to store an element out of EVL range\n");
586+
return false;
587+
}
588+
}
589+
590+
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *StoreOp << "\n");
471591

472592
// Try to create target specific intrinsics to replace the store and shuffle.
473-
if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
593+
if (!TLI->lowerInterleavedStore(StoreOp, SVI, Factor))
474594
return false;
475595

476596
// Already have a new target specific interleaved store. Erase the old store.
477-
DeadInsts.insert(SI);
597+
DeadInsts.insert(StoreOp);
478598
DeadInsts.insert(SVI);
479599
return true;
480600
}
@@ -766,12 +886,15 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
766886
SmallSetVector<Instruction *, 32> DeadInsts;
767887
bool Changed = false;
768888

889+
using namespace PatternMatch;
769890
for (auto &I : instructions(F)) {
770-
if (auto *LI = dyn_cast<LoadInst>(&I))
771-
Changed |= lowerInterleavedLoad(LI, DeadInsts);
891+
if (match(&I, m_CombineOr(m_Load(m_Value()),
892+
m_Intrinsic<Intrinsic::vp_load>())))
893+
Changed |= lowerInterleavedLoad(&I, DeadInsts);
772894

773-
if (auto *SI = dyn_cast<StoreInst>(&I))
774-
Changed |= lowerInterleavedStore(SI, DeadInsts);
895+
if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
896+
m_Intrinsic<Intrinsic::vp_store>())))
897+
Changed |= lowerInterleavedStore(&I, DeadInsts);
775898

776899
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
777900
// At present, we only have intrinsics to represent (de)interleaving

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -17176,14 +17176,18 @@ static Function *getStructuredStoreFunction(Module *M, unsigned Factor,
1717617176
/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
1717717177
/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
1717817178
bool AArch64TargetLowering::lowerInterleavedLoad(
17179-
LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
17179+
Instruction *LoadOp, ArrayRef<ShuffleVectorInst *> Shuffles,
1718017180
ArrayRef<unsigned> Indices, unsigned Factor) const {
1718117181
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
1718217182
"Invalid interleave factor");
1718317183
assert(!Shuffles.empty() && "Empty shufflevector input");
1718417184
assert(Shuffles.size() == Indices.size() &&
1718517185
"Unmatched number of shufflevectors and indices");
1718617186

17187+
auto *LI = dyn_cast<LoadInst>(LoadOp);
17188+
if (!LI)
17189+
return false;
17190+
1718717191
const DataLayout &DL = LI->getDataLayout();
1718817192

1718917193
VectorType *VTy = Shuffles[0]->getType();
@@ -17359,13 +17363,17 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
1735917363
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
1736017364
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
1736117365
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
17362-
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
17366+
bool AArch64TargetLowering::lowerInterleavedStore(Instruction *StoreOp,
1736317367
ShuffleVectorInst *SVI,
1736417368
unsigned Factor) const {
1736517369

1736617370
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
1736717371
"Invalid interleave factor");
1736817372

17373+
auto *SI = dyn_cast<StoreInst>(StoreOp);
17374+
if (!SI)
17375+
return false;
17376+
1736917377
auto *VecTy = cast<FixedVectorType>(SVI->getType());
1737017378
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
1737117379

llvm/lib/Target/AArch64/AArch64ISelLowering.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -716,11 +716,11 @@ class AArch64TargetLowering : public TargetLowering {
716716

717717
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
718718

719-
bool lowerInterleavedLoad(LoadInst *LI,
719+
bool lowerInterleavedLoad(Instruction *LoadOp,
720720
ArrayRef<ShuffleVectorInst *> Shuffles,
721721
ArrayRef<unsigned> Indices,
722722
unsigned Factor) const override;
723-
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
723+
bool lowerInterleavedStore(Instruction *StoreOp, ShuffleVectorInst *SVI,
724724
unsigned Factor) const override;
725725

726726
bool lowerDeinterleaveIntrinsicToLoad(

0 commit comments

Comments
 (0)