@@ -884,6 +884,63 @@ bool SUnitWithMemInfo::getUnderlyingObjects() {
884
884
return true ;
885
885
}
886
886
887
+ // / Returns true if there is a loop-carried order dependency from \p Src to \p
888
+ // / Dst.
889
+ static bool hasLoopCarriedMemDep (const SUnitWithMemInfo &Src,
890
+ const SUnitWithMemInfo &Dst,
891
+ BatchAAResults &BAA,
892
+ const TargetInstrInfo *TII,
893
+ const TargetRegisterInfo *TRI) {
894
+ if (Src.isTriviallyDisjoint (Dst))
895
+ return false ;
896
+ if (isSuccOrder (Src.SU , Dst.SU ))
897
+ return false ;
898
+
899
+ MachineInstr &SrcMI = *Src.SU ->getInstr ();
900
+ MachineInstr &DstMI = *Dst.SU ->getInstr ();
901
+ // First, perform the cheaper check that compares the base register.
902
+ // If they are the same and the load offset is less than the store
903
+ // offset, then mark the dependence as loop carried potentially.
904
+ const MachineOperand *BaseOp1, *BaseOp2;
905
+ int64_t Offset1, Offset2;
906
+ bool Offset1IsScalable, Offset2IsScalable;
907
+ if (TII->getMemOperandWithOffset (SrcMI, BaseOp1, Offset1, Offset1IsScalable,
908
+ TRI) &&
909
+ TII->getMemOperandWithOffset (DstMI, BaseOp2, Offset2, Offset2IsScalable,
910
+ TRI)) {
911
+ if (BaseOp1->isIdenticalTo (*BaseOp2) &&
912
+ Offset1IsScalable == Offset2IsScalable && (int )Offset1 < (int )Offset2) {
913
+ assert (TII->areMemAccessesTriviallyDisjoint (SrcMI, DstMI) &&
914
+ " What happened to the chain edge?" );
915
+ return true ;
916
+ }
917
+ }
918
+
919
+ // Second, the more expensive check that uses alias analysis on the
920
+ // base registers. If they alias, and the load offset is less than
921
+ // the store offset, the mark the dependence as loop carried.
922
+ if (Src.isUnknown () || Dst.isUnknown ())
923
+ return true ;
924
+ if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset )
925
+ return true ;
926
+
927
+ if (BAA.isNoAlias (
928
+ MemoryLocation::getBeforeOrAfter (Src.MemOpValue , Src.AATags ),
929
+ MemoryLocation::getBeforeOrAfter (Dst.MemOpValue , Dst.AATags )))
930
+ return false ;
931
+
932
+ // AliasAnalysis sometimes gives up on following the underlying
933
+ // object. In such a case, separate checks for underlying objects may
934
+ // prove that there are no aliases between two accesses.
935
+ for (const Value *SrcObj : Src.UnderlyingObjs )
936
+ for (const Value *DstObj : Dst.UnderlyingObjs )
937
+ if (!BAA.isNoAlias (MemoryLocation::getBeforeOrAfter (SrcObj, Src.AATags ),
938
+ MemoryLocation::getBeforeOrAfter (DstObj, Dst.AATags )))
939
+ return true ;
940
+
941
+ return false ;
942
+ }
943
+
887
944
// / Add a chain edge between a load and store if the store can be an
888
945
// / alias of the load on a subsequent iteration, i.e., a loop carried
889
946
// / dependence. This code is very similar to the code in ScheduleDAGInstrs
@@ -898,76 +955,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences() {
898
955
PendingLoads.emplace_back (&SU);
899
956
} else if (MI.mayStore ()) {
900
957
SUnitWithMemInfo Store (&SU);
901
- for (const SUnitWithMemInfo &Load : PendingLoads) {
902
- if (Load.isTriviallyDisjoint (Store))
903
- continue ;
904
- if (isSuccOrder (Load.SU , Store.SU ))
905
- continue ;
906
- MachineInstr &LdMI = *Load.SU ->getInstr ();
907
- // First, perform the cheaper check that compares the base register.
908
- // If they are the same and the load offset is less than the store
909
- // offset, then mark the dependence as loop carried potentially.
910
- const MachineOperand *BaseOp1, *BaseOp2;
911
- int64_t Offset1, Offset2;
912
- bool Offset1IsScalable, Offset2IsScalable;
913
- if (TII->getMemOperandWithOffset (LdMI, BaseOp1, Offset1,
914
- Offset1IsScalable, TRI) &&
915
- TII->getMemOperandWithOffset (MI, BaseOp2, Offset2,
916
- Offset2IsScalable, TRI)) {
917
- if (BaseOp1->isIdenticalTo (*BaseOp2) &&
918
- Offset1IsScalable == Offset2IsScalable &&
919
- (int )Offset1 < (int )Offset2) {
920
- assert (TII->areMemAccessesTriviallyDisjoint (LdMI, MI) &&
921
- " What happened to the chain edge?" );
922
- SDep Dep (Load.SU , SDep::Barrier);
923
- Dep.setLatency (1 );
924
- SU.addPred (Dep);
925
- continue ;
926
- }
927
- }
928
- // Second, the more expensive check that uses alias analysis on the
929
- // base registers. If they alias, and the load offset is less than
930
- // the store offset, the mark the dependence as loop carried.
931
- if (Load.isUnknown () || Store.isUnknown ()) {
932
- SDep Dep (Load.SU , SDep::Barrier);
933
- Dep.setLatency (1 );
934
- SU.addPred (Dep);
935
- continue ;
936
- }
937
- if (Load.MemOpValue == Store.MemOpValue &&
938
- Load.MemOpOffset <= Store.MemOpOffset ) {
939
- SDep Dep (Load.SU , SDep::Barrier);
940
- Dep.setLatency (1 );
941
- SU.addPred (Dep);
942
- continue ;
943
- }
944
-
945
- bool IsNoAlias = [&] {
946
- if (BAA.isNoAlias (MemoryLocation::getBeforeOrAfter (Load.MemOpValue ,
947
- Load.AATags ),
948
- MemoryLocation::getBeforeOrAfter (Store.MemOpValue ,
949
- Store.AATags )))
950
- return true ;
951
-
952
- // AliasAnalysis sometimes gives up on following the underlying
953
- // object. In such a case, separate checks for underlying objects may
954
- // prove that there are no aliases between two accesses.
955
- for (const Value *LoadObj : Load.UnderlyingObjs )
956
- for (const Value *StoreObj : Store.UnderlyingObjs )
957
- if (!BAA.isNoAlias (
958
- MemoryLocation::getBeforeOrAfter (LoadObj, Load.AATags ),
959
- MemoryLocation::getBeforeOrAfter (StoreObj, Store.AATags )))
960
- return false ;
961
-
962
- return true ;
963
- }();
964
-
965
- if (!IsNoAlias) {
958
+ for (const SUnitWithMemInfo &Load : PendingLoads)
959
+ if (hasLoopCarriedMemDep (Load, Store, BAA, TII, TRI)) {
966
960
SDep Dep (Load.SU , SDep::Barrier);
967
961
Dep.setLatency (1 );
968
962
SU.addPred (Dep);
969
963
}
970
- }
971
964
}
972
965
}
973
966
}
0 commit comments