@@ -250,6 +250,31 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
250
250
return false ;
251
251
}
252
252
253
+ // / Return true if it's a non-all-zeros, interleaving mask. For instance,
254
+ // / 111000111000 is interleaved from three 1010 masks.
255
+ // / \p SubMask returns the mask of individual lane.
256
+ static bool isInterleavedConstantMask (unsigned Factor, ConstantVector *Mask,
257
+ SmallVectorImpl<Constant *> &LaneMask) {
258
+ unsigned LaneMaskLen = LaneMask.size ();
259
+ if (auto *Splat = Mask->getSplatValue ()) {
260
+ // All-zeros mask.
261
+ if (Splat->isZeroValue ())
262
+ return false ;
263
+ // All-ones mask.
264
+ std::fill (LaneMask.begin (), LaneMask.end (),
265
+ ConstantInt::getTrue (Mask->getContext ()));
266
+ } else {
267
+ for (unsigned Idx = 0U , N = LaneMaskLen * Factor; Idx < N; ++Idx) {
268
+ Constant *Ref = Mask->getAggregateElement ((Idx / Factor) * Factor);
269
+ if (Ref != Mask->getAggregateElement (Idx))
270
+ return false ;
271
+ LaneMask[Idx / Factor] = Ref;
272
+ }
273
+ }
274
+
275
+ return true ;
276
+ }
277
+
253
278
bool InterleavedAccessImpl::lowerInterleavedLoad (
254
279
Instruction *LoadOp, SmallSetVector<Instruction *, 32 > &DeadInsts) {
255
280
if (isa<ScalableVectorType>(LoadOp->getType ()))
@@ -261,8 +286,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
261
286
} else if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
262
287
assert (VPLoad->getIntrinsicID () == Intrinsic::vp_load);
263
288
// Require a constant mask and evl.
264
- if (!isa<ConstantVector>(VPLoad->getArgOperand (1 )) ||
265
- !isa<ConstantInt>(VPLoad->getArgOperand (2 )))
289
+ if (!isa<ConstantVector>(VPLoad->getArgOperand (1 )))
266
290
return false ;
267
291
} else {
268
292
llvm_unreachable (" unsupported load operation" );
@@ -315,24 +339,6 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
315
339
NumLoadElements))
316
340
return false ;
317
341
318
- // If this is a vp.load, record its mask (NOT shuffle mask).
319
- BitVector MaskedIndices (NumLoadElements);
320
- if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
321
- auto *Mask = cast<ConstantVector>(VPLoad->getArgOperand (1 ));
322
- assert (cast<FixedVectorType>(Mask->getType ())->getNumElements () ==
323
- NumLoadElements);
324
- if (auto *Splat = Mask->getSplatValue ()) {
325
- // All-zeros mask, bail out early.
326
- if (Splat->isZeroValue ())
327
- return false ;
328
- } else {
329
- for (unsigned i = 0U ; i < NumLoadElements; ++i) {
330
- if (Mask->getAggregateElement (i)->isZeroValue ())
331
- MaskedIndices.set (i);
332
- }
333
- }
334
- }
335
-
336
342
// Holds the corresponding index for each DE-interleave shuffle.
337
343
SmallVector<unsigned , 4 > Indices;
338
344
@@ -373,48 +379,35 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
373
379
bool BinOpShuffleChanged =
374
380
replaceBinOpShuffles (BinOpShuffles.getArrayRef (), Shuffles, LoadOp);
375
381
376
- // Check if we extract only the unmasked elements.
377
- if (MaskedIndices.any ()) {
378
- if (any_of (Shuffles, [&](const auto *Shuffle) {
379
- ArrayRef<int > ShuffleMask = Shuffle->getShuffleMask ();
380
- for (int Idx : ShuffleMask) {
381
- if (Idx < 0 )
382
- continue ;
383
- if (MaskedIndices.test (unsigned (Idx)))
384
- return true ;
385
- }
386
- return false ;
387
- })) {
388
- LLVM_DEBUG (dbgs () << " IA: trying to extract a masked element through "
389
- << " shufflevector\n " );
390
- return false ;
391
- }
392
- }
393
- // Check if we extract only the elements within evl.
382
+ // Check if the de-interleaved vp.load masks are the same.
383
+ unsigned ShuffleMaskLen = Shuffles[0 ]->getShuffleMask ().size ();
384
+ SmallVector<Constant *, 8 > LaneMask (ShuffleMaskLen, nullptr );
394
385
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
395
- uint64_t EVL = cast<ConstantInt>(VPLoad->getArgOperand (2 ))->getZExtValue ();
396
- if (any_of (Shuffles, [&](const auto *Shuffle) {
397
- ArrayRef<int > ShuffleMask = Shuffle->getShuffleMask ();
398
- for (int Idx : ShuffleMask) {
399
- if (Idx < 0 )
400
- continue ;
401
- if (unsigned (Idx) >= EVL)
402
- return true ;
403
- }
404
- return false ;
405
- })) {
406
- LLVM_DEBUG (
407
- dbgs () << " IA: trying to extract an element out of EVL range\n " );
386
+ if (!isInterleavedConstantMask (
387
+ Factor, cast<ConstantVector>(VPLoad->getArgOperand (1 )), LaneMask))
408
388
return false ;
409
- }
410
389
}
411
390
412
391
LLVM_DEBUG (dbgs () << " IA: Found an interleaved load: " << *LoadOp << " \n " );
413
392
414
- // Try to create target specific intrinsics to replace the load and shuffles.
415
- if (!TLI->lowerInterleavedLoad (LoadOp, Shuffles, Indices, Factor)) {
416
- // If Extracts is not empty, tryReplaceExtracts made changes earlier.
417
- return !Extracts.empty () || BinOpShuffleChanged;
393
+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadOp)) {
394
+ auto *MaskVec = ConstantVector::get (LaneMask);
395
+ // Sometimes the number of Shuffles might be less than Factor, we have to
396
+ // fill the gaps with null. Also, lowerDeinterleavedVPLoad
397
+ // expects them to be sorted.
398
+ SmallVector<Value *, 4 > ShuffleValues (Factor, nullptr );
399
+ for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices))
400
+ ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx];
401
+ if (!TLI->lowerDeinterleavedVPLoad (VPLoad, MaskVec, ShuffleValues))
402
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
403
+ return !Extracts.empty () || BinOpShuffleChanged;
404
+ } else {
405
+ // Try to create target specific intrinsics to replace the load and
406
+ // shuffles.
407
+ if (!TLI->lowerInterleavedLoad (cast<LoadInst>(LoadOp), Shuffles, Indices,
408
+ Factor))
409
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
410
+ return !Extracts.empty () || BinOpShuffleChanged;
418
411
}
419
412
420
413
DeadInsts.insert_range (Shuffles);
@@ -530,9 +523,8 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
530
523
StoredValue = SI->getValueOperand ();
531
524
} else if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
532
525
assert (VPStore->getIntrinsicID () == Intrinsic::vp_store);
533
- // Require a constant mask and evl.
534
- if (!isa<ConstantVector>(VPStore->getArgOperand (2 )) ||
535
- !isa<ConstantInt>(VPStore->getArgOperand (3 )))
526
+ // Require a constant mask.
527
+ if (!isa<ConstantVector>(VPStore->getArgOperand (2 )))
536
528
return false ;
537
529
StoredValue = VPStore->getArgOperand (0 );
538
530
} else {
@@ -545,53 +537,53 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
545
537
546
538
unsigned NumStoredElements =
547
539
cast<FixedVectorType>(SVI->getType ())->getNumElements ();
548
- // If this is a vp.store, record its mask (NOT shuffle mask).
549
- BitVector MaskedIndices (NumStoredElements);
550
- if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
551
- auto *Mask = cast<ConstantVector>(VPStore->getArgOperand (2 ));
552
- assert (cast<FixedVectorType>(Mask->getType ())->getNumElements () ==
553
- NumStoredElements);
554
- if (auto *Splat = Mask->getSplatValue ()) {
555
- // All-zeros mask, bail out early.
556
- if (Splat->isZeroValue ())
557
- return false ;
558
- } else {
559
- for (unsigned i = 0U ; i < NumStoredElements; ++i) {
560
- if (Mask->getAggregateElement (i)->isZeroValue ())
561
- MaskedIndices.set (i);
562
- }
563
- }
564
- }
565
-
566
540
// Check if the shufflevector is RE-interleave shuffle.
567
541
unsigned Factor;
568
542
if (!isReInterleaveMask (SVI, Factor, MaxFactor))
569
543
return false ;
544
+ assert (NumStoredElements % Factor == 0 &&
545
+ " number of stored element should be a multiple of Factor" );
570
546
571
- // Check if we store only the unmasked elements.
572
- if (MaskedIndices.any ()) {
573
- if (any_of (SVI->getShuffleMask (), [&](int Idx) {
574
- return Idx >= 0 && MaskedIndices.test (unsigned (Idx));
575
- })) {
576
- LLVM_DEBUG (dbgs () << " IA: trying to store a masked element\n " );
577
- return false ;
578
- }
579
- }
580
- // Check if we store only the elements within evl.
547
+ // Check if the de-interleaved vp.store masks are the same.
548
+ unsigned LaneMaskLen = NumStoredElements / Factor;
549
+ SmallVector<Constant *, 8 > LaneMask (LaneMaskLen, nullptr );
581
550
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
582
- uint64_t EVL = cast<ConstantInt>(VPStore->getArgOperand (3 ))->getZExtValue ();
583
- if (any_of (SVI->getShuffleMask (),
584
- [&](int Idx) { return Idx >= 0 && unsigned (Idx) >= EVL; })) {
585
- LLVM_DEBUG (dbgs () << " IA: trying to store an element out of EVL range\n " );
551
+ if (!isInterleavedConstantMask (
552
+ Factor, cast<ConstantVector>(VPStore->getArgOperand (2 )), LaneMask))
586
553
return false ;
587
- }
588
554
}
589
555
590
556
LLVM_DEBUG (dbgs () << " IA: Found an interleaved store: " << *StoreOp << " \n " );
591
557
592
- // Try to create target specific intrinsics to replace the store and shuffle.
593
- if (!TLI->lowerInterleavedStore (StoreOp, SVI, Factor))
594
- return false ;
558
+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoreOp)) {
559
+ IRBuilder<> Builder (VPStore);
560
+ // We need to effectively de-interleave the shufflemask
561
+ // because lowerInterleavedVPStore expected individual de-interleaved
562
+ // values.
563
+ SmallVector<Value *, 10 > NewShuffles;
564
+ SmallVector<int , 16 > NewShuffleMask (LaneMaskLen);
565
+ auto ShuffleMask = SVI->getShuffleMask ();
566
+
567
+ for (unsigned i = 0 ; i < Factor; i++) {
568
+ for (unsigned j = 0 ; j < LaneMaskLen; j++)
569
+ NewShuffleMask[j] = ShuffleMask[i + Factor * j];
570
+
571
+ NewShuffles.push_back (Builder.CreateShuffleVector (
572
+ SVI->getOperand (0 ), SVI->getOperand (1 ), NewShuffleMask));
573
+ }
574
+
575
+ // Try to create target specific intrinsics to replace the vp.store and
576
+ // shuffle.
577
+ if (!TLI->lowerInterleavedVPStore (VPStore, ConstantVector::get (LaneMask),
578
+ NewShuffles))
579
+ // We already created new shuffles.
580
+ return true ;
581
+ } else {
582
+ // Try to create target specific intrinsics to replace the store and
583
+ // shuffle.
584
+ if (!TLI->lowerInterleavedStore (cast<StoreInst>(StoreOp), SVI, Factor))
585
+ return false ;
586
+ }
595
587
596
588
// Already have a new target specific interleaved store. Erase the old store.
597
589
DeadInsts.insert (StoreOp);
@@ -806,8 +798,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
806
798
807
799
// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
808
800
// TLI function to emit target-specific interleaved instruction.
809
- if (!TLI->lowerDeinterleavedIntrinsicToVPLoad (VPLoad, Mask,
810
- DeinterleaveValues))
801
+ if (!TLI->lowerDeinterleavedVPLoad (VPLoad, Mask, DeinterleaveValues))
811
802
return false ;
812
803
813
804
} else {
@@ -859,8 +850,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
859
850
860
851
// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
861
852
// TLI function to emit target-specific interleaved instruction.
862
- if (!TLI->lowerInterleavedIntrinsicToVPStore (VPStore, Mask,
863
- InterleaveValues))
853
+ if (!TLI->lowerInterleavedVPStore (VPStore, Mask, InterleaveValues))
864
854
return false ;
865
855
} else {
866
856
auto *SI = cast<StoreInst>(StoredBy);
0 commit comments