-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[RISCV] Removeriscv.segN.load/store
in favor of their mask variants
#137045
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Min-Yih Hsu (mshockwave) ChangesRISCVVectorPeepholePass would replace instructions with all-ones mask with their unmask variant, so there isn't really a point to keep separate versions of intrinsics. Note that This PR stacks on top of #135445 Patch is 116.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137045.diff 12 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4f2f202f94841..6eaa1bae1da97 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3179,15 +3179,15 @@ class TargetLoweringBase {
return false;
}
- /// Lower an interleaved load to target specific intrinsics. Return
+ /// Lower a deinterleaved load to target specific intrinsics. Return
/// true on success.
///
/// \p Load is a vp.load instruction.
/// \p Mask is a mask value
/// \p DeinterleaveRes is a list of deinterleaved results.
virtual bool
- lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveRes) const {
+ lowerDeinterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
+ ArrayRef<Value *> DeinterleaveRes) const {
return false;
}
@@ -3197,9 +3197,8 @@ class TargetLoweringBase {
/// \p Store is the vp.store instruction.
/// \p Mask is a mask value
/// \p InterleaveOps is a list of values being interleaved.
- virtual bool
- lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask,
- ArrayRef<Value *> InterleaveOps) const {
+ virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
+ ArrayRef<Value *> InterleaveOps) const {
return false;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 99cb557d9aa09..18b2883eb00e7 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1704,19 +1704,27 @@ let TargetPrefix = "riscv" in {
}
// Segment loads/stores for fixed vectors.
+ // Note: we only have the masked variants because RISCVVectorPeephole
+ // would lower any instructions with all-ones mask into unmasked version
+ // anyway.
foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
- def int_riscv_seg # nf # _load
+ // Input: (pointer, mask, vl)
+ def int_riscv_seg # nf # _load_mask
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>,
!add(nf, -1))),
- [llvm_anyptr_ty, llvm_anyint_ty],
+ [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>;
- def int_riscv_seg # nf # _store
+
+ // Input: (<stored values>..., pointer, mask, vl)
+ def int_riscv_seg # nf # _store_mask
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>,
!add(nf, -1)),
- [llvm_anyptr_ty, llvm_anyint_ty]),
+ [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 9e47510e9cd1a..66c1cd98ef602 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -100,11 +100,11 @@ class InterleavedAccessImpl {
unsigned MaxFactor = 0u;
/// Transform an interleaved load into target specific intrinsics.
- bool lowerInterleavedLoad(LoadInst *LI,
+ bool lowerInterleavedLoad(Instruction *LoadOp,
SmallSetVector<Instruction *, 32> &DeadInsts);
/// Transform an interleaved store into target specific intrinsics.
- bool lowerInterleavedStore(StoreInst *SI,
+ bool lowerInterleavedStore(Instruction *StoreOp,
SmallSetVector<Instruction *, 32> &DeadInsts);
/// Transform a load and a deinterleave intrinsic into target specific
@@ -131,7 +131,7 @@ class InterleavedAccessImpl {
/// made.
bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
- LoadInst *LI);
+ Instruction *LI);
};
class InterleavedAccess : public FunctionPass {
@@ -249,11 +249,43 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return false;
}
+/// Return true if it's an interleaving mask. For instance, 111000111000 is
+/// interleaved from three 1010 masks. \p SubMask returns the mask of individual
+/// lane.
+static bool isInterleavedConstantMask(unsigned Factor, ConstantVector *Mask,
+ SmallVectorImpl<Constant *> &LaneMask) {
+ unsigned LaneMaskLen = LaneMask.size();
+ if (auto *Splat = Mask->getSplatValue()) {
+ std::fill(LaneMask.begin(), LaneMask.end(), Splat);
+ } else {
+ for (unsigned Idx = 0U, N = LaneMaskLen * Factor; Idx < N; ++Idx) {
+ Constant *Ref = Mask->getAggregateElement(alignDown(Idx, Factor));
+ if (Ref != Mask->getAggregateElement(Idx))
+ return false;
+ LaneMask[Idx / Factor] = Ref;
+ }
+ }
+
+ return true;
+}
+
bool InterleavedAccessImpl::lowerInterleavedLoad(
- LoadInst *LI, SmallSetVector<Instruction *, 32> &DeadInsts) {
- if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
+ Instruction *LoadOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
+ if (isa<ScalableVectorType>(LoadOp->getType()))
return false;
+ if (auto *LI = dyn_cast<LoadInst>(LoadOp)) {
+ if (!LI->isSimple())
+ return false;
+ } else if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load);
+ // Require a constant mask and evl.
+ if (!isa<ConstantVector>(VPLoad->getArgOperand(1)))
+ return false;
+ } else {
+ llvm_unreachable("unsupported load operation");
+ }
+
// Check if all users of this load are shufflevectors. If we encounter any
// users that are extractelement instructions or binary operators, we save
// them to later check if they can be modified to extract from one of the
@@ -265,7 +297,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
// binop are the same load.
SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
- for (auto *User : LI->users()) {
+ for (auto *User : LoadOp->users()) {
auto *Extract = dyn_cast<ExtractElementInst>(User);
if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
Extracts.push_back(Extract);
@@ -294,7 +326,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
unsigned Factor, Index;
unsigned NumLoadElements =
- cast<FixedVectorType>(LI->getType())->getNumElements();
+ cast<FixedVectorType>(LoadOp->getType())->getNumElements();
auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
@@ -327,9 +359,9 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
- if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LoadOp)
Indices.push_back(Index);
- if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LoadOp)
Indices.push_back(Index);
}
@@ -339,25 +371,48 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
return false;
bool BinOpShuffleChanged =
- replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
-
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
+ replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LoadOp);
+
+ // Check if the de-interleaved vp.load masks are the same.
+ unsigned ShuffleMaskLen = Shuffles[0]->getShuffleMask().size();
+ SmallVector<Constant *, 8> LaneMask(ShuffleMaskLen, nullptr);
+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ if (!isInterleavedConstantMask(
+ Factor, cast<ConstantVector>(VPLoad->getArgOperand(1)), LaneMask))
+ return false;
+ }
- // Try to create target specific intrinsics to replace the load and shuffles.
- if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
- // If Extracts is not empty, tryReplaceExtracts made changes earlier.
- return !Extracts.empty() || BinOpShuffleChanged;
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LoadOp << "\n");
+
+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ auto *MaskVec = ConstantVector::get(LaneMask);
+ // Sometimes the number of Shuffles might be less than Factor, we have to
+ // fill the gaps with null. Also, lowerDeinterleavedVPLoad
+ // expects them to be sorted.
+ SmallVector<Value *, 4> ShuffleValues(Factor, nullptr);
+ for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices))
+ ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx];
+ if (!TLI->lowerDeinterleavedVPLoad(VPLoad, MaskVec, ShuffleValues))
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
+ } else {
+ // Try to create target specific intrinsics to replace the load and
+ // shuffles.
+ if (!TLI->lowerInterleavedLoad(cast<LoadInst>(LoadOp), Shuffles, Indices,
+ Factor))
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
}
DeadInsts.insert_range(Shuffles);
- DeadInsts.insert(LI);
+ DeadInsts.insert(LoadOp);
return true;
}
bool InterleavedAccessImpl::replaceBinOpShuffles(
ArrayRef<ShuffleVectorInst *> BinOpShuffles,
- SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles, Instruction *LoadOp) {
for (auto *SVI : BinOpShuffles) {
BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
Type *BIOp0Ty = BI->getOperand(0)->getType();
@@ -380,9 +435,9 @@ bool InterleavedAccessImpl::replaceBinOpShuffles(
<< "\n With : " << *NewSVI1 << "\n And : "
<< *NewSVI2 << "\n And : " << *NewBI << "\n");
RecursivelyDeleteTriviallyDeadInstructions(SVI);
- if (NewSVI1->getOperand(0) == LI)
+ if (NewSVI1->getOperand(0) == LoadOp)
Shuffles.push_back(NewSVI1);
- if (NewSVI2->getOperand(0) == LI)
+ if (NewSVI2->getOperand(0) == LoadOp)
Shuffles.push_back(NewSVI2);
}
@@ -454,27 +509,78 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
}
bool InterleavedAccessImpl::lowerInterleavedStore(
- StoreInst *SI, SmallSetVector<Instruction *, 32> &DeadInsts) {
- if (!SI->isSimple())
- return false;
+ Instruction *StoreOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
+ Value *StoredValue;
+ if (auto *SI = dyn_cast<StoreInst>(StoreOp)) {
+ if (!SI->isSimple())
+ return false;
+ StoredValue = SI->getValueOperand();
+ } else if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
+ // Require a constant mask.
+ if (!isa<ConstantVector>(VPStore->getArgOperand(2)))
+ return false;
+ StoredValue = VPStore->getArgOperand(0);
+ } else {
+ llvm_unreachable("unsupported store operation");
+ }
- auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+ auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
return false;
+ unsigned NumStoredElements =
+ cast<FixedVectorType>(SVI->getType())->getNumElements();
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
+ assert(NumStoredElements % Factor == 0 &&
+ "number of stored element should be a multiple of Factor");
+
+ // Check if the de-interleaved vp.store masks are the same.
+ unsigned LaneMaskLen = NumStoredElements / Factor;
+ SmallVector<Constant *, 8> LaneMask(LaneMaskLen, nullptr);
+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ if (!isInterleavedConstantMask(
+ Factor, cast<ConstantVector>(VPStore->getArgOperand(2)), LaneMask))
+ return false;
+ }
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *StoreOp << "\n");
- // Try to create target specific intrinsics to replace the store and shuffle.
- if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
- return false;
+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ IRBuilder<> Builder(VPStore);
+ // We need to effectively de-interleave the shufflemask
+ // because lowerInterleavedVPStore expected individual de-interleaved
+ // values.
+ SmallVector<Value *, 10> NewShuffles;
+ SmallVector<int, 16> NewShuffleMask(LaneMaskLen);
+ auto ShuffleMask = SVI->getShuffleMask();
+
+ for (unsigned i = 0; i < Factor; i++) {
+ for (unsigned j = 0; j < LaneMaskLen; j++)
+ NewShuffleMask[j] = ShuffleMask[i + Factor * j];
+
+ NewShuffles.push_back(Builder.CreateShuffleVector(
+ SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask));
+ }
+
+ // Try to create target specific intrinsics to replace the vp.store and
+ // shuffle.
+ if (!TLI->lowerInterleavedVPStore(VPStore, ConstantVector::get(LaneMask),
+ NewShuffles))
+ // We already created new shuffles.
+ return true;
+ } else {
+ // Try to create target specific intrinsics to replace the store and
+ // shuffle.
+ if (!TLI->lowerInterleavedStore(cast<StoreInst>(StoreOp), SVI, Factor))
+ return false;
+ }
// Already have a new target specific interleaved store. Erase the old store.
- DeadInsts.insert(SI);
+ DeadInsts.insert(StoreOp);
DeadInsts.insert(SVI);
return true;
}
@@ -686,8 +792,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
// TLI function to emit target-specific interleaved instruction.
- if (!TLI->lowerDeinterleavedIntrinsicToVPLoad(VPLoad, Mask,
- DeinterleaveValues))
+ if (!TLI->lowerDeinterleavedVPLoad(VPLoad, Mask, DeinterleaveValues))
return false;
} else {
@@ -739,8 +844,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
// TLI function to emit target-specific interleaved instruction.
- if (!TLI->lowerInterleavedIntrinsicToVPStore(VPStore, Mask,
- InterleaveValues))
+ if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues))
return false;
} else {
auto *SI = cast<StoreInst>(StoredBy);
@@ -766,12 +870,15 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
SmallSetVector<Instruction *, 32> DeadInsts;
bool Changed = false;
+ using namespace PatternMatch;
for (auto &I : instructions(F)) {
- if (auto *LI = dyn_cast<LoadInst>(&I))
- Changed |= lowerInterleavedLoad(LI, DeadInsts);
+ if (match(&I, m_CombineOr(m_Load(m_Value()),
+ m_Intrinsic<Intrinsic::vp_load>())))
+ Changed |= lowerInterleavedLoad(&I, DeadInsts);
- if (auto *SI = dyn_cast<StoreInst>(&I))
- Changed |= lowerInterleavedStore(SI, DeadInsts);
+ if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
+ m_Intrinsic<Intrinsic::vp_store>())))
+ Changed |= lowerInterleavedStore(&I, DeadInsts);
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
// At present, we only have intrinsics to represent (de)interleaving
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7d192756fd56..96baa24e9665f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1724,24 +1724,24 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
- case Intrinsic::riscv_seg2_load:
- case Intrinsic::riscv_seg3_load:
- case Intrinsic::riscv_seg4_load:
- case Intrinsic::riscv_seg5_load:
- case Intrinsic::riscv_seg6_load:
- case Intrinsic::riscv_seg7_load:
- case Intrinsic::riscv_seg8_load:
+ case Intrinsic::riscv_seg2_load_mask:
+ case Intrinsic::riscv_seg3_load_mask:
+ case Intrinsic::riscv_seg4_load_mask:
+ case Intrinsic::riscv_seg5_load_mask:
+ case Intrinsic::riscv_seg6_load_mask:
+ case Intrinsic::riscv_seg7_load_mask:
+ case Intrinsic::riscv_seg8_load_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
- case Intrinsic::riscv_seg2_store:
- case Intrinsic::riscv_seg3_store:
- case Intrinsic::riscv_seg4_store:
- case Intrinsic::riscv_seg5_store:
- case Intrinsic::riscv_seg6_store:
- case Intrinsic::riscv_seg7_store:
- case Intrinsic::riscv_seg8_store:
- // Operands are (vec, ..., vec, ptr, vl)
- return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
+ case Intrinsic::riscv_seg2_store_mask:
+ case Intrinsic::riscv_seg3_store_mask:
+ case Intrinsic::riscv_seg4_store_mask:
+ case Intrinsic::riscv_seg5_store_mask:
+ case Intrinsic::riscv_seg6_store_mask:
+ case Intrinsic::riscv_seg7_store_mask:
+ case Intrinsic::riscv_seg8_store_mask:
+ // Operands are (vec, ..., vec, ptr, mask, vl)
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
/*IsStore*/ true,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
case Intrinsic::riscv_vle:
@@ -10444,19 +10444,19 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
switch (IntNo) {
default:
break;
- case Intrinsic::riscv_seg2_load:
- case Intrinsic::riscv_seg3_load:
- case Intrinsic::riscv_seg4_load:
- case Intrinsic::riscv_seg5_load:
- case Intrinsic::riscv_seg6_load:
- case Intrinsic::riscv_seg7_load:
- case Intrinsic::riscv_seg8_load: {
+ case Intrinsic::riscv_seg2_load_mask:
+ case Intrinsic::riscv_seg3_load_mask:
+ case Intrinsic::riscv_seg4_load_mask:
+ case Intrinsic::riscv_seg5_load_mask:
+ case Intrinsic::riscv_seg6_load_mask:
+ case Intrinsic::riscv_seg7_load_mask:
+ case Intrinsic::riscv_seg8_load_mask: {
SDLoc DL(Op);
static const Intrinsic::ID VlsegInts[7] = {
- Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
- Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
- Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
- Intrinsic::riscv_vlseg8};
+ Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
+ Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
+ Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
+ Intrinsic::riscv_vlseg8_mask};
unsigned NF = Op->getNumValues() - 1;
assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
MVT XLenVT = Subtarget.getXLenVT();
@@ -10466,7 +10466,16 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
ContainerVT.getScalarSizeInBits();
EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
- SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ // Operands: (chain, int_id, pointer, mask, vl)
+ SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+ SDValue Mask = Op.getOperand(3);
+ MVT MaskVT = Mask.getSimpleValueType();
+ if (MaskVT.isFixedLengthVector()) {
+ MVT MaskContainerVT =
+ ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+ Mask = convertToScalabl...
[truncated]
|
@llvm/pr-subscribers-llvm-ir Author: Min-Yih Hsu (mshockwave) ChangesRISCVVectorPeepholePass would replace instructions with all-ones mask with their unmask variant, so there isn't really a point to keep separate versions of intrinsics. Note that This PR stacks on top of #135445 Patch is 116.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137045.diff 12 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4f2f202f94841..6eaa1bae1da97 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3179,15 +3179,15 @@ class TargetLoweringBase {
return false;
}
- /// Lower an interleaved load to target specific intrinsics. Return
+ /// Lower a deinterleaved load to target specific intrinsics. Return
/// true on success.
///
/// \p Load is a vp.load instruction.
/// \p Mask is a mask value
/// \p DeinterleaveRes is a list of deinterleaved results.
virtual bool
- lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveRes) const {
+ lowerDeinterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
+ ArrayRef<Value *> DeinterleaveRes) const {
return false;
}
@@ -3197,9 +3197,8 @@ class TargetLoweringBase {
/// \p Store is the vp.store instruction.
/// \p Mask is a mask value
/// \p InterleaveOps is a list of values being interleaved.
- virtual bool
- lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask,
- ArrayRef<Value *> InterleaveOps) const {
+ virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
+ ArrayRef<Value *> InterleaveOps) const {
return false;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 99cb557d9aa09..18b2883eb00e7 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1704,19 +1704,27 @@ let TargetPrefix = "riscv" in {
}
// Segment loads/stores for fixed vectors.
+ // Note: we only have the masked variants because RISCVVectorPeephole
+ // would lower any instructions with all-ones mask into unmasked version
+ // anyway.
foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
- def int_riscv_seg # nf # _load
+ // Input: (pointer, mask, vl)
+ def int_riscv_seg # nf # _load_mask
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>,
!add(nf, -1))),
- [llvm_anyptr_ty, llvm_anyint_ty],
+ [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>;
- def int_riscv_seg # nf # _store
+
+ // Input: (<stored values>..., pointer, mask, vl)
+ def int_riscv_seg # nf # _store_mask
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>,
!add(nf, -1)),
- [llvm_anyptr_ty, llvm_anyint_ty]),
+ [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 9e47510e9cd1a..66c1cd98ef602 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -100,11 +100,11 @@ class InterleavedAccessImpl {
unsigned MaxFactor = 0u;
/// Transform an interleaved load into target specific intrinsics.
- bool lowerInterleavedLoad(LoadInst *LI,
+ bool lowerInterleavedLoad(Instruction *LoadOp,
SmallSetVector<Instruction *, 32> &DeadInsts);
/// Transform an interleaved store into target specific intrinsics.
- bool lowerInterleavedStore(StoreInst *SI,
+ bool lowerInterleavedStore(Instruction *StoreOp,
SmallSetVector<Instruction *, 32> &DeadInsts);
/// Transform a load and a deinterleave intrinsic into target specific
@@ -131,7 +131,7 @@ class InterleavedAccessImpl {
/// made.
bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
- LoadInst *LI);
+ Instruction *LI);
};
class InterleavedAccess : public FunctionPass {
@@ -249,11 +249,43 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return false;
}
+/// Return true if it's an interleaving mask. For instance, 111000111000 is
+/// interleaved from three 1010 masks. \p SubMask returns the mask of individual
+/// lane.
+static bool isInterleavedConstantMask(unsigned Factor, ConstantVector *Mask,
+ SmallVectorImpl<Constant *> &LaneMask) {
+ unsigned LaneMaskLen = LaneMask.size();
+ if (auto *Splat = Mask->getSplatValue()) {
+ std::fill(LaneMask.begin(), LaneMask.end(), Splat);
+ } else {
+ for (unsigned Idx = 0U, N = LaneMaskLen * Factor; Idx < N; ++Idx) {
+ Constant *Ref = Mask->getAggregateElement(alignDown(Idx, Factor));
+ if (Ref != Mask->getAggregateElement(Idx))
+ return false;
+ LaneMask[Idx / Factor] = Ref;
+ }
+ }
+
+ return true;
+}
+
bool InterleavedAccessImpl::lowerInterleavedLoad(
- LoadInst *LI, SmallSetVector<Instruction *, 32> &DeadInsts) {
- if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
+ Instruction *LoadOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
+ if (isa<ScalableVectorType>(LoadOp->getType()))
return false;
+ if (auto *LI = dyn_cast<LoadInst>(LoadOp)) {
+ if (!LI->isSimple())
+ return false;
+ } else if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load);
+ // Require a constant mask and evl.
+ if (!isa<ConstantVector>(VPLoad->getArgOperand(1)))
+ return false;
+ } else {
+ llvm_unreachable("unsupported load operation");
+ }
+
// Check if all users of this load are shufflevectors. If we encounter any
// users that are extractelement instructions or binary operators, we save
// them to later check if they can be modified to extract from one of the
@@ -265,7 +297,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
// binop are the same load.
SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
- for (auto *User : LI->users()) {
+ for (auto *User : LoadOp->users()) {
auto *Extract = dyn_cast<ExtractElementInst>(User);
if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
Extracts.push_back(Extract);
@@ -294,7 +326,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
unsigned Factor, Index;
unsigned NumLoadElements =
- cast<FixedVectorType>(LI->getType())->getNumElements();
+ cast<FixedVectorType>(LoadOp->getType())->getNumElements();
auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
@@ -327,9 +359,9 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
- if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LoadOp)
Indices.push_back(Index);
- if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LoadOp)
Indices.push_back(Index);
}
@@ -339,25 +371,48 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
return false;
bool BinOpShuffleChanged =
- replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
-
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
+ replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LoadOp);
+
+ // Check if the de-interleaved vp.load masks are the same.
+ unsigned ShuffleMaskLen = Shuffles[0]->getShuffleMask().size();
+ SmallVector<Constant *, 8> LaneMask(ShuffleMaskLen, nullptr);
+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ if (!isInterleavedConstantMask(
+ Factor, cast<ConstantVector>(VPLoad->getArgOperand(1)), LaneMask))
+ return false;
+ }
- // Try to create target specific intrinsics to replace the load and shuffles.
- if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
- // If Extracts is not empty, tryReplaceExtracts made changes earlier.
- return !Extracts.empty() || BinOpShuffleChanged;
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LoadOp << "\n");
+
+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ auto *MaskVec = ConstantVector::get(LaneMask);
+ // Sometimes the number of Shuffles might be less than Factor, we have to
+ // fill the gaps with null. Also, lowerDeinterleavedVPLoad
+ // expects them to be sorted.
+ SmallVector<Value *, 4> ShuffleValues(Factor, nullptr);
+ for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices))
+ ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx];
+ if (!TLI->lowerDeinterleavedVPLoad(VPLoad, MaskVec, ShuffleValues))
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
+ } else {
+ // Try to create target specific intrinsics to replace the load and
+ // shuffles.
+ if (!TLI->lowerInterleavedLoad(cast<LoadInst>(LoadOp), Shuffles, Indices,
+ Factor))
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
}
DeadInsts.insert_range(Shuffles);
- DeadInsts.insert(LI);
+ DeadInsts.insert(LoadOp);
return true;
}
bool InterleavedAccessImpl::replaceBinOpShuffles(
ArrayRef<ShuffleVectorInst *> BinOpShuffles,
- SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles, Instruction *LoadOp) {
for (auto *SVI : BinOpShuffles) {
BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
Type *BIOp0Ty = BI->getOperand(0)->getType();
@@ -380,9 +435,9 @@ bool InterleavedAccessImpl::replaceBinOpShuffles(
<< "\n With : " << *NewSVI1 << "\n And : "
<< *NewSVI2 << "\n And : " << *NewBI << "\n");
RecursivelyDeleteTriviallyDeadInstructions(SVI);
- if (NewSVI1->getOperand(0) == LI)
+ if (NewSVI1->getOperand(0) == LoadOp)
Shuffles.push_back(NewSVI1);
- if (NewSVI2->getOperand(0) == LI)
+ if (NewSVI2->getOperand(0) == LoadOp)
Shuffles.push_back(NewSVI2);
}
@@ -454,27 +509,78 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
}
bool InterleavedAccessImpl::lowerInterleavedStore(
- StoreInst *SI, SmallSetVector<Instruction *, 32> &DeadInsts) {
- if (!SI->isSimple())
- return false;
+ Instruction *StoreOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
+ Value *StoredValue;
+ if (auto *SI = dyn_cast<StoreInst>(StoreOp)) {
+ if (!SI->isSimple())
+ return false;
+ StoredValue = SI->getValueOperand();
+ } else if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
+ // Require a constant mask.
+ if (!isa<ConstantVector>(VPStore->getArgOperand(2)))
+ return false;
+ StoredValue = VPStore->getArgOperand(0);
+ } else {
+ llvm_unreachable("unsupported store operation");
+ }
- auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+ auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
return false;
+ unsigned NumStoredElements =
+ cast<FixedVectorType>(SVI->getType())->getNumElements();
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
+ assert(NumStoredElements % Factor == 0 &&
+ "number of stored element should be a multiple of Factor");
+
+ // Check if the de-interleaved vp.store masks are the same.
+ unsigned LaneMaskLen = NumStoredElements / Factor;
+ SmallVector<Constant *, 8> LaneMask(LaneMaskLen, nullptr);
+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ if (!isInterleavedConstantMask(
+ Factor, cast<ConstantVector>(VPStore->getArgOperand(2)), LaneMask))
+ return false;
+ }
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *StoreOp << "\n");
- // Try to create target specific intrinsics to replace the store and shuffle.
- if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
- return false;
+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ IRBuilder<> Builder(VPStore);
+ // We need to effectively de-interleave the shufflemask
+ // because lowerInterleavedVPStore expected individual de-interleaved
+ // values.
+ SmallVector<Value *, 10> NewShuffles;
+ SmallVector<int, 16> NewShuffleMask(LaneMaskLen);
+ auto ShuffleMask = SVI->getShuffleMask();
+
+ for (unsigned i = 0; i < Factor; i++) {
+ for (unsigned j = 0; j < LaneMaskLen; j++)
+ NewShuffleMask[j] = ShuffleMask[i + Factor * j];
+
+ NewShuffles.push_back(Builder.CreateShuffleVector(
+ SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask));
+ }
+
+ // Try to create target specific intrinsics to replace the vp.store and
+ // shuffle.
+ if (!TLI->lowerInterleavedVPStore(VPStore, ConstantVector::get(LaneMask),
+ NewShuffles))
+ // We already created new shuffles.
+ return true;
+ } else {
+ // Try to create target specific intrinsics to replace the store and
+ // shuffle.
+ if (!TLI->lowerInterleavedStore(cast<StoreInst>(StoreOp), SVI, Factor))
+ return false;
+ }
// Already have a new target specific interleaved store. Erase the old store.
- DeadInsts.insert(SI);
+ DeadInsts.insert(StoreOp);
DeadInsts.insert(SVI);
return true;
}
@@ -686,8 +792,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
// TLI function to emit target-specific interleaved instruction.
- if (!TLI->lowerDeinterleavedIntrinsicToVPLoad(VPLoad, Mask,
- DeinterleaveValues))
+ if (!TLI->lowerDeinterleavedVPLoad(VPLoad, Mask, DeinterleaveValues))
return false;
} else {
@@ -739,8 +844,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
// TLI function to emit target-specific interleaved instruction.
- if (!TLI->lowerInterleavedIntrinsicToVPStore(VPStore, Mask,
- InterleaveValues))
+ if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues))
return false;
} else {
auto *SI = cast<StoreInst>(StoredBy);
@@ -766,12 +870,15 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
SmallSetVector<Instruction *, 32> DeadInsts;
bool Changed = false;
+ using namespace PatternMatch;
for (auto &I : instructions(F)) {
- if (auto *LI = dyn_cast<LoadInst>(&I))
- Changed |= lowerInterleavedLoad(LI, DeadInsts);
+ if (match(&I, m_CombineOr(m_Load(m_Value()),
+ m_Intrinsic<Intrinsic::vp_load>())))
+ Changed |= lowerInterleavedLoad(&I, DeadInsts);
- if (auto *SI = dyn_cast<StoreInst>(&I))
- Changed |= lowerInterleavedStore(SI, DeadInsts);
+ if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
+ m_Intrinsic<Intrinsic::vp_store>())))
+ Changed |= lowerInterleavedStore(&I, DeadInsts);
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
// At present, we only have intrinsics to represent (de)interleaving
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7d192756fd56..96baa24e9665f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1724,24 +1724,24 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
- case Intrinsic::riscv_seg2_load:
- case Intrinsic::riscv_seg3_load:
- case Intrinsic::riscv_seg4_load:
- case Intrinsic::riscv_seg5_load:
- case Intrinsic::riscv_seg6_load:
- case Intrinsic::riscv_seg7_load:
- case Intrinsic::riscv_seg8_load:
+ case Intrinsic::riscv_seg2_load_mask:
+ case Intrinsic::riscv_seg3_load_mask:
+ case Intrinsic::riscv_seg4_load_mask:
+ case Intrinsic::riscv_seg5_load_mask:
+ case Intrinsic::riscv_seg6_load_mask:
+ case Intrinsic::riscv_seg7_load_mask:
+ case Intrinsic::riscv_seg8_load_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
- case Intrinsic::riscv_seg2_store:
- case Intrinsic::riscv_seg3_store:
- case Intrinsic::riscv_seg4_store:
- case Intrinsic::riscv_seg5_store:
- case Intrinsic::riscv_seg6_store:
- case Intrinsic::riscv_seg7_store:
- case Intrinsic::riscv_seg8_store:
- // Operands are (vec, ..., vec, ptr, vl)
- return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
+ case Intrinsic::riscv_seg2_store_mask:
+ case Intrinsic::riscv_seg3_store_mask:
+ case Intrinsic::riscv_seg4_store_mask:
+ case Intrinsic::riscv_seg5_store_mask:
+ case Intrinsic::riscv_seg6_store_mask:
+ case Intrinsic::riscv_seg7_store_mask:
+ case Intrinsic::riscv_seg8_store_mask:
+ // Operands are (vec, ..., vec, ptr, mask, vl)
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
/*IsStore*/ true,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
case Intrinsic::riscv_vle:
@@ -10444,19 +10444,19 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
switch (IntNo) {
default:
break;
- case Intrinsic::riscv_seg2_load:
- case Intrinsic::riscv_seg3_load:
- case Intrinsic::riscv_seg4_load:
- case Intrinsic::riscv_seg5_load:
- case Intrinsic::riscv_seg6_load:
- case Intrinsic::riscv_seg7_load:
- case Intrinsic::riscv_seg8_load: {
+ case Intrinsic::riscv_seg2_load_mask:
+ case Intrinsic::riscv_seg3_load_mask:
+ case Intrinsic::riscv_seg4_load_mask:
+ case Intrinsic::riscv_seg5_load_mask:
+ case Intrinsic::riscv_seg6_load_mask:
+ case Intrinsic::riscv_seg7_load_mask:
+ case Intrinsic::riscv_seg8_load_mask: {
SDLoc DL(Op);
static const Intrinsic::ID VlsegInts[7] = {
- Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
- Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
- Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
- Intrinsic::riscv_vlseg8};
+ Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
+ Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
+ Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
+ Intrinsic::riscv_vlseg8_mask};
unsigned NF = Op->getNumValues() - 1;
assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
MVT XLenVT = Subtarget.getXLenVT();
@@ -10466,7 +10466,16 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
ContainerVT.getScalarSizeInBits();
EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
- SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ // Operands: (chain, int_id, pointer, mask, vl)
+ SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+ SDValue Mask = Op.getOperand(3);
+ MVT MaskVT = Mask.getSimpleValueType();
+ if (MaskVT.isFixedLengthVector()) {
+ MVT MaskContainerVT =
+ ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+ Mask = convertToScalabl...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Min-Yih Hsu (mshockwave) ChangesRISCVVectorPeepholePass would replace instructions with all-ones mask with their unmask variant, so there isn't really a point to keep separate versions of intrinsics. Note that This PR stacks on top of #135445 Patch is 116.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137045.diff 12 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4f2f202f94841..6eaa1bae1da97 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3179,15 +3179,15 @@ class TargetLoweringBase {
return false;
}
- /// Lower an interleaved load to target specific intrinsics. Return
+ /// Lower a deinterleaved load to target specific intrinsics. Return
/// true on success.
///
/// \p Load is a vp.load instruction.
/// \p Mask is a mask value
/// \p DeinterleaveRes is a list of deinterleaved results.
virtual bool
- lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveRes) const {
+ lowerDeinterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
+ ArrayRef<Value *> DeinterleaveRes) const {
return false;
}
@@ -3197,9 +3197,8 @@ class TargetLoweringBase {
/// \p Store is the vp.store instruction.
/// \p Mask is a mask value
/// \p InterleaveOps is a list of values being interleaved.
- virtual bool
- lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask,
- ArrayRef<Value *> InterleaveOps) const {
+ virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
+ ArrayRef<Value *> InterleaveOps) const {
return false;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 99cb557d9aa09..18b2883eb00e7 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1704,19 +1704,27 @@ let TargetPrefix = "riscv" in {
}
// Segment loads/stores for fixed vectors.
+ // Note: we only have the masked variants because RISCVVectorPeephole
+ // would lower any instructions with all-ones mask into unmasked version
+ // anyway.
foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
- def int_riscv_seg # nf # _load
+ // Input: (pointer, mask, vl)
+ def int_riscv_seg # nf # _load_mask
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>,
!add(nf, -1))),
- [llvm_anyptr_ty, llvm_anyint_ty],
+ [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>;
- def int_riscv_seg # nf # _store
+
+ // Input: (<stored values>..., pointer, mask, vl)
+ def int_riscv_seg # nf # _store_mask
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>,
!add(nf, -1)),
- [llvm_anyptr_ty, llvm_anyint_ty]),
+ [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 9e47510e9cd1a..66c1cd98ef602 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -100,11 +100,11 @@ class InterleavedAccessImpl {
unsigned MaxFactor = 0u;
/// Transform an interleaved load into target specific intrinsics.
- bool lowerInterleavedLoad(LoadInst *LI,
+ bool lowerInterleavedLoad(Instruction *LoadOp,
SmallSetVector<Instruction *, 32> &DeadInsts);
/// Transform an interleaved store into target specific intrinsics.
- bool lowerInterleavedStore(StoreInst *SI,
+ bool lowerInterleavedStore(Instruction *StoreOp,
SmallSetVector<Instruction *, 32> &DeadInsts);
/// Transform a load and a deinterleave intrinsic into target specific
@@ -131,7 +131,7 @@ class InterleavedAccessImpl {
/// made.
bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
- LoadInst *LI);
+ Instruction *LI);
};
class InterleavedAccess : public FunctionPass {
@@ -249,11 +249,43 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return false;
}
+/// Return true if it's an interleaving mask. For instance, 111000111000 is
+/// interleaved from three 1010 masks. \p SubMask returns the mask of individual
+/// lane.
+static bool isInterleavedConstantMask(unsigned Factor, ConstantVector *Mask,
+ SmallVectorImpl<Constant *> &LaneMask) {
+ unsigned LaneMaskLen = LaneMask.size();
+ if (auto *Splat = Mask->getSplatValue()) {
+ std::fill(LaneMask.begin(), LaneMask.end(), Splat);
+ } else {
+ for (unsigned Idx = 0U, N = LaneMaskLen * Factor; Idx < N; ++Idx) {
+ Constant *Ref = Mask->getAggregateElement(alignDown(Idx, Factor));
+ if (Ref != Mask->getAggregateElement(Idx))
+ return false;
+ LaneMask[Idx / Factor] = Ref;
+ }
+ }
+
+ return true;
+}
+
bool InterleavedAccessImpl::lowerInterleavedLoad(
- LoadInst *LI, SmallSetVector<Instruction *, 32> &DeadInsts) {
- if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
+ Instruction *LoadOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
+ if (isa<ScalableVectorType>(LoadOp->getType()))
return false;
+ if (auto *LI = dyn_cast<LoadInst>(LoadOp)) {
+ if (!LI->isSimple())
+ return false;
+ } else if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load);
+ // Require a constant mask and evl.
+ if (!isa<ConstantVector>(VPLoad->getArgOperand(1)))
+ return false;
+ } else {
+ llvm_unreachable("unsupported load operation");
+ }
+
// Check if all users of this load are shufflevectors. If we encounter any
// users that are extractelement instructions or binary operators, we save
// them to later check if they can be modified to extract from one of the
@@ -265,7 +297,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
// binop are the same load.
SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
- for (auto *User : LI->users()) {
+ for (auto *User : LoadOp->users()) {
auto *Extract = dyn_cast<ExtractElementInst>(User);
if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
Extracts.push_back(Extract);
@@ -294,7 +326,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
unsigned Factor, Index;
unsigned NumLoadElements =
- cast<FixedVectorType>(LI->getType())->getNumElements();
+ cast<FixedVectorType>(LoadOp->getType())->getNumElements();
auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
@@ -327,9 +359,9 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
- if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LoadOp)
Indices.push_back(Index);
- if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LoadOp)
Indices.push_back(Index);
}
@@ -339,25 +371,48 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
return false;
bool BinOpShuffleChanged =
- replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
-
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
+ replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LoadOp);
+
+ // Check if the de-interleaved vp.load masks are the same.
+ unsigned ShuffleMaskLen = Shuffles[0]->getShuffleMask().size();
+ SmallVector<Constant *, 8> LaneMask(ShuffleMaskLen, nullptr);
+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ if (!isInterleavedConstantMask(
+ Factor, cast<ConstantVector>(VPLoad->getArgOperand(1)), LaneMask))
+ return false;
+ }
- // Try to create target specific intrinsics to replace the load and shuffles.
- if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
- // If Extracts is not empty, tryReplaceExtracts made changes earlier.
- return !Extracts.empty() || BinOpShuffleChanged;
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LoadOp << "\n");
+
+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
+ auto *MaskVec = ConstantVector::get(LaneMask);
+ // Sometimes the number of Shuffles might be less than Factor, we have to
+ // fill the gaps with null. Also, lowerDeinterleavedVPLoad
+ // expects them to be sorted.
+ SmallVector<Value *, 4> ShuffleValues(Factor, nullptr);
+ for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices))
+ ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx];
+ if (!TLI->lowerDeinterleavedVPLoad(VPLoad, MaskVec, ShuffleValues))
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
+ } else {
+ // Try to create target specific intrinsics to replace the load and
+ // shuffles.
+ if (!TLI->lowerInterleavedLoad(cast<LoadInst>(LoadOp), Shuffles, Indices,
+ Factor))
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
}
DeadInsts.insert_range(Shuffles);
- DeadInsts.insert(LI);
+ DeadInsts.insert(LoadOp);
return true;
}
bool InterleavedAccessImpl::replaceBinOpShuffles(
ArrayRef<ShuffleVectorInst *> BinOpShuffles,
- SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles, Instruction *LoadOp) {
for (auto *SVI : BinOpShuffles) {
BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
Type *BIOp0Ty = BI->getOperand(0)->getType();
@@ -380,9 +435,9 @@ bool InterleavedAccessImpl::replaceBinOpShuffles(
<< "\n With : " << *NewSVI1 << "\n And : "
<< *NewSVI2 << "\n And : " << *NewBI << "\n");
RecursivelyDeleteTriviallyDeadInstructions(SVI);
- if (NewSVI1->getOperand(0) == LI)
+ if (NewSVI1->getOperand(0) == LoadOp)
Shuffles.push_back(NewSVI1);
- if (NewSVI2->getOperand(0) == LI)
+ if (NewSVI2->getOperand(0) == LoadOp)
Shuffles.push_back(NewSVI2);
}
@@ -454,27 +509,78 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
}
bool InterleavedAccessImpl::lowerInterleavedStore(
- StoreInst *SI, SmallSetVector<Instruction *, 32> &DeadInsts) {
- if (!SI->isSimple())
- return false;
+ Instruction *StoreOp, SmallSetVector<Instruction *, 32> &DeadInsts) {
+ Value *StoredValue;
+ if (auto *SI = dyn_cast<StoreInst>(StoreOp)) {
+ if (!SI->isSimple())
+ return false;
+ StoredValue = SI->getValueOperand();
+ } else if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
+ // Require a constant mask.
+ if (!isa<ConstantVector>(VPStore->getArgOperand(2)))
+ return false;
+ StoredValue = VPStore->getArgOperand(0);
+ } else {
+ llvm_unreachable("unsupported store operation");
+ }
- auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+ auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
return false;
+ unsigned NumStoredElements =
+ cast<FixedVectorType>(SVI->getType())->getNumElements();
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
+ assert(NumStoredElements % Factor == 0 &&
+ "number of stored element should be a multiple of Factor");
+
+ // Check if the de-interleaved vp.store masks are the same.
+ unsigned LaneMaskLen = NumStoredElements / Factor;
+ SmallVector<Constant *, 8> LaneMask(LaneMaskLen, nullptr);
+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ if (!isInterleavedConstantMask(
+ Factor, cast<ConstantVector>(VPStore->getArgOperand(2)), LaneMask))
+ return false;
+ }
- LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *StoreOp << "\n");
- // Try to create target specific intrinsics to replace the store and shuffle.
- if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
- return false;
+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
+ IRBuilder<> Builder(VPStore);
+ // We need to effectively de-interleave the shufflemask
+ // because lowerInterleavedVPStore expected individual de-interleaved
+ // values.
+ SmallVector<Value *, 10> NewShuffles;
+ SmallVector<int, 16> NewShuffleMask(LaneMaskLen);
+ auto ShuffleMask = SVI->getShuffleMask();
+
+ for (unsigned i = 0; i < Factor; i++) {
+ for (unsigned j = 0; j < LaneMaskLen; j++)
+ NewShuffleMask[j] = ShuffleMask[i + Factor * j];
+
+ NewShuffles.push_back(Builder.CreateShuffleVector(
+ SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask));
+ }
+
+ // Try to create target specific intrinsics to replace the vp.store and
+ // shuffle.
+ if (!TLI->lowerInterleavedVPStore(VPStore, ConstantVector::get(LaneMask),
+ NewShuffles))
+ // We already created new shuffles.
+ return true;
+ } else {
+ // Try to create target specific intrinsics to replace the store and
+ // shuffle.
+ if (!TLI->lowerInterleavedStore(cast<StoreInst>(StoreOp), SVI, Factor))
+ return false;
+ }
// Already have a new target specific interleaved store. Erase the old store.
- DeadInsts.insert(SI);
+ DeadInsts.insert(StoreOp);
DeadInsts.insert(SVI);
return true;
}
@@ -686,8 +792,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
// TLI function to emit target-specific interleaved instruction.
- if (!TLI->lowerDeinterleavedIntrinsicToVPLoad(VPLoad, Mask,
- DeinterleaveValues))
+ if (!TLI->lowerDeinterleavedVPLoad(VPLoad, Mask, DeinterleaveValues))
return false;
} else {
@@ -739,8 +844,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
// TLI function to emit target-specific interleaved instruction.
- if (!TLI->lowerInterleavedIntrinsicToVPStore(VPStore, Mask,
- InterleaveValues))
+ if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues))
return false;
} else {
auto *SI = cast<StoreInst>(StoredBy);
@@ -766,12 +870,15 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
SmallSetVector<Instruction *, 32> DeadInsts;
bool Changed = false;
+ using namespace PatternMatch;
for (auto &I : instructions(F)) {
- if (auto *LI = dyn_cast<LoadInst>(&I))
- Changed |= lowerInterleavedLoad(LI, DeadInsts);
+ if (match(&I, m_CombineOr(m_Load(m_Value()),
+ m_Intrinsic<Intrinsic::vp_load>())))
+ Changed |= lowerInterleavedLoad(&I, DeadInsts);
- if (auto *SI = dyn_cast<StoreInst>(&I))
- Changed |= lowerInterleavedStore(SI, DeadInsts);
+ if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
+ m_Intrinsic<Intrinsic::vp_store>())))
+ Changed |= lowerInterleavedStore(&I, DeadInsts);
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
// At present, we only have intrinsics to represent (de)interleaving
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7d192756fd56..96baa24e9665f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1724,24 +1724,24 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
- case Intrinsic::riscv_seg2_load:
- case Intrinsic::riscv_seg3_load:
- case Intrinsic::riscv_seg4_load:
- case Intrinsic::riscv_seg5_load:
- case Intrinsic::riscv_seg6_load:
- case Intrinsic::riscv_seg7_load:
- case Intrinsic::riscv_seg8_load:
+ case Intrinsic::riscv_seg2_load_mask:
+ case Intrinsic::riscv_seg3_load_mask:
+ case Intrinsic::riscv_seg4_load_mask:
+ case Intrinsic::riscv_seg5_load_mask:
+ case Intrinsic::riscv_seg6_load_mask:
+ case Intrinsic::riscv_seg7_load_mask:
+ case Intrinsic::riscv_seg8_load_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
- case Intrinsic::riscv_seg2_store:
- case Intrinsic::riscv_seg3_store:
- case Intrinsic::riscv_seg4_store:
- case Intrinsic::riscv_seg5_store:
- case Intrinsic::riscv_seg6_store:
- case Intrinsic::riscv_seg7_store:
- case Intrinsic::riscv_seg8_store:
- // Operands are (vec, ..., vec, ptr, vl)
- return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
+ case Intrinsic::riscv_seg2_store_mask:
+ case Intrinsic::riscv_seg3_store_mask:
+ case Intrinsic::riscv_seg4_store_mask:
+ case Intrinsic::riscv_seg5_store_mask:
+ case Intrinsic::riscv_seg6_store_mask:
+ case Intrinsic::riscv_seg7_store_mask:
+ case Intrinsic::riscv_seg8_store_mask:
+ // Operands are (vec, ..., vec, ptr, mask, vl)
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
/*IsStore*/ true,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
case Intrinsic::riscv_vle:
@@ -10444,19 +10444,19 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
switch (IntNo) {
default:
break;
- case Intrinsic::riscv_seg2_load:
- case Intrinsic::riscv_seg3_load:
- case Intrinsic::riscv_seg4_load:
- case Intrinsic::riscv_seg5_load:
- case Intrinsic::riscv_seg6_load:
- case Intrinsic::riscv_seg7_load:
- case Intrinsic::riscv_seg8_load: {
+ case Intrinsic::riscv_seg2_load_mask:
+ case Intrinsic::riscv_seg3_load_mask:
+ case Intrinsic::riscv_seg4_load_mask:
+ case Intrinsic::riscv_seg5_load_mask:
+ case Intrinsic::riscv_seg6_load_mask:
+ case Intrinsic::riscv_seg7_load_mask:
+ case Intrinsic::riscv_seg8_load_mask: {
SDLoc DL(Op);
static const Intrinsic::ID VlsegInts[7] = {
- Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
- Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
- Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
- Intrinsic::riscv_vlseg8};
+ Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
+ Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
+ Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
+ Intrinsic::riscv_vlseg8_mask};
unsigned NF = Op->getNumValues() - 1;
assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
MVT XLenVT = Subtarget.getXLenVT();
@@ -10466,7 +10466,16 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
ContainerVT.getScalarSizeInBits();
EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
- SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ // Operands: (chain, int_id, pointer, mask, vl)
+ SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+ SDValue Mask = Op.getOperand(3);
+ MVT MaskVT = Mask.getSimpleValueType();
+ if (MaskVT.isFixedLengthVector()) {
+ MVT MaskContainerVT =
+ ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+ Mask = convertToScalabl...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/include/llvm/CodeGen/TargetLowering.h llvm/lib/CodeGen/InterleavedAccessPass.cpp llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/lib/Target/RISCV/RISCVISelLowering.h llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-store.ll llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll llvm/test/Transforms/InterleavedAccess/RISCV/zve32x.ll llvm/test/Transforms/InterleavedAccess/RISCV/zvl32b.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. I agree we should do some subtractions at times.
…ants RISCVVectorPeepholePass would replace instructions with all-ones mask with their unmask variant, so there isn't really a point to keep separate versions of intrinsics.
c4d96dc
to
75108c0
Compare
I think we should not call it "deprecate" if you're deleting them. Most of the time when we say we're deprecating something in LLVM, it's still usable for a time with maybe a warning. |
riscv.segN.load/store
in favor of their mask variantsriscv.segN.load/store
in favor of their mask variants
Agree. The title is updated now. |
RISCVVectorPeepholePass would replace instructions with all-ones mask with their unmask variant, so there isn't really a point to keep separate versions of intrinsics.
Note that
riscv.segN.load/store.mask
does not take pointer type (i.e. address space) as part of its overloading type signature, because RISC-V doesn't really use address spaces other than the default one.This PR stacks on top of #135445