@@ -237,6 +237,37 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
237
237
INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
238
238
" Modulo Software Pipelining" , false , false )
239
239
240
+ namespace {
241
+
242
+ // / This class holds an SUnit corresponding to a memory operation and other
243
+ // / information related to the instruction.
244
+ struct SUnitWithMemInfo {
245
+ SUnit *SU;
246
+ SmallVector<const Value *, 2 > UnderlyingObjs;
247
+
248
+ // / The value of a memory operand.
249
+ const Value *MemOpValue = nullptr ;
250
+
251
+ // / The offset of a memory operand.
252
+ int64_t MemOpOffset = 0 ;
253
+
254
+ AAMDNodes AATags;
255
+
256
+ // / True if all the underlying objects are identified.
257
+ bool IsAllIdentified = false ;
258
+
259
+ SUnitWithMemInfo (SUnit *SU);
260
+
261
+ bool isTriviallyDisjoint (const SUnitWithMemInfo &Other) const ;
262
+
263
+ bool isUnknown () const { return MemOpValue == nullptr ; }
264
+
265
+ private:
266
+ bool getUnderlyingObjects ();
267
+ };
268
+
269
+ } // end anonymous namespace
270
+
240
271
// / The "main" function for implementing Swing Modulo Scheduling.
241
272
bool MachinePipeliner::runOnMachineFunction (MachineFunction &mf) {
242
273
if (skipFunction (mf.getFunction ()))
@@ -470,9 +501,10 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
470
501
bool MachinePipeliner::swingModuloScheduler (MachineLoop &L) {
471
502
assert (L.getBlocks ().size () == 1 && " SMS works on single blocks only." );
472
503
504
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults ();
473
505
SwingSchedulerDAG SMS (
474
506
*this , L, getAnalysis<LiveIntervalsWrapperPass>().getLIS (), RegClassInfo,
475
- II_setByPragma, LI.LoopPipelinerInfo .get ());
507
+ II_setByPragma, LI.LoopPipelinerInfo .get (), AA );
476
508
477
509
MachineBasicBlock *MBB = L.getHeader ();
478
510
// The kernel should not include any terminator instructions. These
@@ -560,9 +592,8 @@ void SwingSchedulerDAG::setMAX_II() {
560
592
// / We override the schedule function in ScheduleDAGInstrs to implement the
561
593
// / scheduling part of the Swing Modulo Scheduling algorithm.
562
594
void SwingSchedulerDAG::schedule () {
563
- AliasAnalysis *AA = &Pass.getAnalysis <AAResultsWrapperPass>().getAAResults ();
564
595
buildSchedGraph (AA);
565
- addLoopCarriedDependences (AA );
596
+ addLoopCarriedDependences ();
566
597
updatePhiDependences ();
567
598
Topo.InitDAGTopologicalSorting ();
568
599
changeDependences ();
@@ -810,113 +841,131 @@ static bool isDependenceBarrier(MachineInstr &MI) {
810
841
(!MI.mayLoad () || !MI.isDereferenceableInvariantLoad ()));
811
842
}
812
843
813
- // / Return the underlying objects for the memory references of an instruction.
844
+ SUnitWithMemInfo::SUnitWithMemInfo (SUnit *SU) : SU(SU) {
845
+ if (!getUnderlyingObjects ())
846
+ return ;
847
+ for (const Value *Obj : UnderlyingObjs)
848
+ if (!isIdentifiedObject (Obj)) {
849
+ IsAllIdentified = false ;
850
+ break ;
851
+ }
852
+ }
853
+
854
+ bool SUnitWithMemInfo::isTriviallyDisjoint (
855
+ const SUnitWithMemInfo &Other) const {
856
+ // If all underlying objects are identified objects and there is no overlap
857
+ // between them, then these two instructions are disjoint.
858
+ if (!IsAllIdentified || !Other.IsAllIdentified )
859
+ return false ;
860
+ for (const Value *Obj : UnderlyingObjs)
861
+ if (llvm::is_contained (Other.UnderlyingObjs , Obj))
862
+ return false ;
863
+ return true ;
864
+ }
865
+
866
+ // / Collect the underlying objects for the memory references of an instruction.
814
867
// / This function calls the code in ValueTracking, but first checks that the
815
868
// / instruction has a memory operand.
816
- static void getUnderlyingObjects (const MachineInstr *MI,
817
- SmallVectorImpl<const Value *> &Objs) {
869
+ // / Returns false if we cannot find the underlying objects.
870
+ bool SUnitWithMemInfo::getUnderlyingObjects () {
871
+ const MachineInstr *MI = SU->getInstr ();
818
872
if (!MI->hasOneMemOperand ())
819
- return ;
873
+ return false ;
820
874
MachineMemOperand *MM = *MI->memoperands_begin ();
821
875
if (!MM->getValue ())
822
- return ;
823
- getUnderlyingObjects (MM->getValue (), Objs);
824
- for (const Value *V : Objs) {
825
- if (!isIdentifiedObject (V)) {
826
- Objs.clear ();
827
- return ;
828
- }
829
- }
876
+ return false ;
877
+ MemOpValue = MM->getValue ();
878
+ MemOpOffset = MM->getOffset ();
879
+ llvm::getUnderlyingObjects (MemOpValue, UnderlyingObjs);
880
+
881
+ // TODO: A no alias scope may be valid only in a single iteration. In this
882
+ // case we need to peel off it like LoopAccessAnalysis does.
883
+ AATags = MM->getAAInfo ();
884
+ return true ;
830
885
}
831
886
832
887
// / Add a chain edge between a load and store if the store can be an
833
888
// / alias of the load on a subsequent iteration, i.e., a loop carried
834
889
// / dependence. This code is very similar to the code in ScheduleDAGInstrs
835
890
// / but that code doesn't create loop carried dependences.
836
- void SwingSchedulerDAG::addLoopCarriedDependences (AliasAnalysis *AA) {
837
- MapVector<const Value *, SmallVector<SUnit *, 4 >> PendingLoads;
838
- Value *UnknownValue =
839
- UndefValue::get (Type::getVoidTy (MF.getFunction ().getContext ()));
891
+ void SwingSchedulerDAG::addLoopCarriedDependences () {
892
+ SmallVector<SUnitWithMemInfo, 4 > PendingLoads;
840
893
for (auto &SU : SUnits) {
841
894
MachineInstr &MI = *SU.getInstr ();
842
895
if (isDependenceBarrier (MI))
843
896
PendingLoads.clear ();
844
897
else if (MI.mayLoad ()) {
845
- SmallVector<const Value *, 4 > Objs;
846
- ::getUnderlyingObjects (&MI, Objs);
847
- if (Objs.empty ())
848
- Objs.push_back (UnknownValue);
849
- for (const auto *V : Objs) {
850
- SmallVector<SUnit *, 4 > &SUs = PendingLoads[V];
851
- SUs.push_back (&SU);
852
- }
898
+ PendingLoads.emplace_back (&SU);
853
899
} else if (MI.mayStore ()) {
854
- SmallVector<const Value *, 4 > Objs;
855
- ::getUnderlyingObjects (&MI, Objs);
856
- if (Objs.empty ())
857
- Objs.push_back (UnknownValue);
858
- for (const auto *V : Objs) {
859
- MapVector<const Value *, SmallVector<SUnit *, 4 >>::iterator I =
860
- PendingLoads.find (V);
861
- if (I == PendingLoads.end ())
900
+ SUnitWithMemInfo Store (&SU);
901
+ for (const SUnitWithMemInfo &Load : PendingLoads) {
902
+ if (Load.isTriviallyDisjoint (Store))
862
903
continue ;
863
- for (auto *Load : I->second ) {
864
- if (isSuccOrder (Load, &SU))
865
- continue ;
866
- MachineInstr &LdMI = *Load->getInstr ();
867
- // First, perform the cheaper check that compares the base register.
868
- // If they are the same and the load offset is less than the store
869
- // offset, then mark the dependence as loop carried potentially.
870
- const MachineOperand *BaseOp1, *BaseOp2;
871
- int64_t Offset1, Offset2;
872
- bool Offset1IsScalable, Offset2IsScalable;
873
- if (TII->getMemOperandWithOffset (LdMI, BaseOp1, Offset1,
874
- Offset1IsScalable, TRI) &&
875
- TII->getMemOperandWithOffset (MI, BaseOp2, Offset2,
876
- Offset2IsScalable, TRI)) {
877
- if (BaseOp1->isIdenticalTo (*BaseOp2) &&
878
- Offset1IsScalable == Offset2IsScalable &&
879
- (int )Offset1 < (int )Offset2) {
880
- assert (TII->areMemAccessesTriviallyDisjoint (LdMI, MI) &&
881
- " What happened to the chain edge?" );
882
- SDep Dep (Load, SDep::Barrier);
883
- Dep.setLatency (1 );
884
- SU.addPred (Dep);
885
- continue ;
886
- }
887
- }
888
- // Second, the more expensive check that uses alias analysis on the
889
- // base registers. If they alias, and the load offset is less than
890
- // the store offset, the mark the dependence as loop carried.
891
- if (!AA) {
892
- SDep Dep (Load, SDep::Barrier);
893
- Dep.setLatency (1 );
894
- SU.addPred (Dep);
895
- continue ;
896
- }
897
- MachineMemOperand *MMO1 = *LdMI.memoperands_begin ();
898
- MachineMemOperand *MMO2 = *MI.memoperands_begin ();
899
- if (!MMO1->getValue () || !MMO2->getValue ()) {
900
- SDep Dep (Load, SDep::Barrier);
901
- Dep.setLatency (1 );
902
- SU.addPred (Dep);
903
- continue ;
904
- }
905
- if (MMO1->getValue () == MMO2->getValue () &&
906
- MMO1->getOffset () <= MMO2->getOffset ()) {
907
- SDep Dep (Load, SDep::Barrier);
904
+ if (isSuccOrder (Load.SU , Store.SU ))
905
+ continue ;
906
+ MachineInstr &LdMI = *Load.SU ->getInstr ();
907
+ // First, perform the cheaper check that compares the base register.
908
+ // If they are the same and the load offset is less than the store
909
+ // offset, then mark the dependence as loop carried potentially.
910
+ const MachineOperand *BaseOp1, *BaseOp2;
911
+ int64_t Offset1, Offset2;
912
+ bool Offset1IsScalable, Offset2IsScalable;
913
+ if (TII->getMemOperandWithOffset (LdMI, BaseOp1, Offset1,
914
+ Offset1IsScalable, TRI) &&
915
+ TII->getMemOperandWithOffset (MI, BaseOp2, Offset2,
916
+ Offset2IsScalable, TRI)) {
917
+ if (BaseOp1->isIdenticalTo (*BaseOp2) &&
918
+ Offset1IsScalable == Offset2IsScalable &&
919
+ (int )Offset1 < (int )Offset2) {
920
+ assert (TII->areMemAccessesTriviallyDisjoint (LdMI, MI) &&
921
+ " What happened to the chain edge?" );
922
+ SDep Dep (Load.SU , SDep::Barrier);
908
923
Dep.setLatency (1 );
909
924
SU.addPred (Dep);
910
925
continue ;
911
926
}
912
- if (!AA->isNoAlias (
913
- MemoryLocation::getAfter (MMO1->getValue (), MMO1->getAAInfo ()),
914
- MemoryLocation::getAfter (MMO2->getValue (),
915
- MMO2->getAAInfo ()))) {
916
- SDep Dep (Load, SDep::Barrier);
917
- Dep.setLatency (1 );
918
- SU.addPred (Dep);
919
- }
927
+ }
928
+ // Second, the more expensive check that uses alias analysis on the
929
+ // base registers. If they alias, and the load offset is less than
930
+ // the store offset, the mark the dependence as loop carried.
931
+ if (Load.isUnknown () || Store.isUnknown ()) {
932
+ SDep Dep (Load.SU , SDep::Barrier);
933
+ Dep.setLatency (1 );
934
+ SU.addPred (Dep);
935
+ continue ;
936
+ }
937
+ if (Load.MemOpValue == Store.MemOpValue &&
938
+ Load.MemOpOffset <= Store.MemOpOffset ) {
939
+ SDep Dep (Load.SU , SDep::Barrier);
940
+ Dep.setLatency (1 );
941
+ SU.addPred (Dep);
942
+ continue ;
943
+ }
944
+
945
+ bool IsNoAlias = [&] {
946
+ if (BAA.isNoAlias (MemoryLocation::getBeforeOrAfter (Load.MemOpValue ,
947
+ Load.AATags ),
948
+ MemoryLocation::getBeforeOrAfter (Store.MemOpValue ,
949
+ Store.AATags )))
950
+ return true ;
951
+
952
+ // AliasAnalysis sometimes gives up on following the underlying
953
+ // object. In such a case, separate checks for underlying objects may
954
+ // prove that there are no aliases between two accesses.
955
+ for (const Value *LoadObj : Load.UnderlyingObjs )
956
+ for (const Value *StoreObj : Store.UnderlyingObjs )
957
+ if (!BAA.isNoAlias (
958
+ MemoryLocation::getBeforeOrAfter (LoadObj, Load.AATags ),
959
+ MemoryLocation::getBeforeOrAfter (StoreObj, Store.AATags )))
960
+ return false ;
961
+
962
+ return true ;
963
+ }();
964
+
965
+ if (!IsNoAlias) {
966
+ SDep Dep (Load.SU , SDep::Barrier);
967
+ Dep.setLatency (1 );
968
+ SU.addPred (Dep);
920
969
}
921
970
}
922
971
}
0 commit comments