-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[DirectX] legalize memset #136244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DirectX] legalize memset #136244
Conversation
@llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) Changesfixes #136243 This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores. Full diff: https://github.com/llvm/llvm-project/pull/136244.diff 4 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index b62ff4c52f70c..d55a84d0126ec 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <functional>
@@ -151,6 +152,82 @@ downcastI64toI32InsertExtractElements(Instruction &I,
}
}
+void emitMemset(IRBuilder<> &Builder, Value *Dst, Value *Val,
+ ConstantInt *SizeCI) {
+ LLVMContext &Ctx = Builder.getContext();
+ [[maybe_unused]] DataLayout DL =
+ Builder.GetInsertBlock()->getModule()->getDataLayout();
+ [[maybe_unused]] uint64_t OrigSize = SizeCI->getZExtValue();
+
+ AllocaInst *Alloca = dyn_cast<AllocaInst>(Dst);
+
+ assert(Alloca && "Expected memset on an Alloca");
+ assert(OrigSize == Alloca->getAllocationSize(DL)->getFixedValue() &&
+ "Expected for memset size to match DataLayout size");
+
+ Type *AllocatedTy = Alloca->getAllocatedType();
+ ArrayType *ArrTy = dyn_cast<ArrayType>(AllocatedTy);
+ assert(ArrTy && "Expected Alloca for an Array Type");
+
+ Type *ElemTy = ArrTy->getElementType();
+ uint64_t Size = ArrTy->getArrayNumElements();
+
+ [[maybe_unused]] uint64_t ElemSize = DL.getTypeStoreSize(ElemTy);
+
+ assert(ElemSize > 0 && "Size must be set");
+ assert(OrigSize == ElemSize * Size && "Size in bytes must match");
+
+ Value *TypedVal = Val;
+ if (Val->getType() != ElemTy)
+ TypedVal = Builder.CreateIntCast(Val, ElemTy,
+ false); // Or use CreateBitCast for float
+
+ for (uint64_t I = 0; I < Size; ++I) {
+ Value *Offset = ConstantInt::get(Type::getInt32Ty(Ctx), I);
+ Value *Ptr = Builder.CreateGEP(ElemTy, Dst, Offset, "gep");
+ Builder.CreateStore(TypedVal, Ptr);
+ }
+}
+
+void removeLifetimesForMemset(CallInst *Memset,
+ SmallVectorImpl<Instruction *> &ToRemove) {
+ assert(Memset->getCalledFunction()->getIntrinsicID() == Intrinsic::memset &&
+ "Expected a memset intrinsic");
+
+ Value *DstPtr = Memset->getArgOperand(0);
+ DstPtr = DstPtr->stripPointerCasts();
+
+ for (User *U : DstPtr->users()) {
+ if (auto *CI = dyn_cast<CallInst>(U)) {
+ switch (CI->getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ ToRemove.push_back(CI);
+ break;
+ }
+ }
+ }
+}
+
+static void removeMemSet(Instruction &I,
+ SmallVectorImpl<Instruction *> &ToRemove,
+ DenseMap<Value *, Value *>) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ Intrinsic::ID ID = CI->getIntrinsicID();
+ if (ID == Intrinsic::memset) {
+ IRBuilder<> Builder(&I);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+ [[maybe_unused]] ConstantInt *Size =
+ dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ assert(Size && "Expected Size to be a ConstantInt");
+ emitMemset(Builder, Dst, Val, Size);
+ removeLifetimesForMemset(CI, ToRemove);
+ ToRemove.push_back(CI);
+ }
+ }
+}
+
namespace {
class DXILLegalizationPipeline {
@@ -181,6 +258,7 @@ class DXILLegalizationPipeline {
LegalizationPipeline.push_back(fixI8TruncUseChain);
LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements);
LegalizationPipeline.push_back(legalizeFreeze);
+ LegalizationPipeline.push_back(removeMemSet);
}
};
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 41f6f37a41f9d..1cd21ae0fd55a 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -96,7 +96,6 @@ class DirectXPassConfig : public TargetPassConfig {
FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
void addCodeGenPrepare() override {
- addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createDXILIntrinsicExpansionLegacyPass());
addPass(createDXILCBufferAccessLegacyPass());
addPass(createDXILDataScalarizationLegacyPass());
@@ -106,6 +105,7 @@ class DirectXPassConfig : public TargetPassConfig {
DxilScalarOptions.ScalarizeLoadStore = true;
addPass(createScalarizerPass(DxilScalarOptions));
addPass(createDXILLegalizeLegacyPass());
+ addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILPrepareModulePass());
diff --git a/llvm/test/CodeGen/DirectX/legalize-memset.ll b/llvm/test/CodeGen/DirectX/legalize-memset.ll
new file mode 100644
index 0000000000000..2d9b28753ef3d
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/legalize-memset.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+
+define void @replace_float_memset_test() {
+; CHECK-LABEL: define void @replace_float_memset_test() {
+; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x float], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP]], align 4
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ACCUM_I_FLAT]], i32 1
+; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP1]], align 4
+; CHECK-NEXT: ret void
+;
+ %accum.i.flat = alloca [2 x float], align 4
+ call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %accum.i.flat)
+ call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 8, i1 false)
+ call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %accum.i.flat)
+ ret void
+}
+
+define void @replace_half_memset_test() {
+; CHECK-LABEL: define void @replace_half_memset_test() {
+; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x half], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr half, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT: store half 0xH0000, ptr [[GEP]], align 2
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr half, ptr [[ACCUM_I_FLAT]], i32 1
+; CHECK-NEXT: store half 0xH0000, ptr [[GEP1]], align 2
+; CHECK-NEXT: ret void
+;
+ %accum.i.flat = alloca [2 x half], align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
+ call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 4, i1 false)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
+ ret void
+}
+
+define void @replace_double_memset_test() {
+; CHECK-LABEL: define void @replace_double_memset_test() {
+; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [2 x double], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP]], align 8
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr double, ptr [[ACCUM_I_FLAT]], i32 1
+; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP1]], align 8
+; CHECK-NEXT: ret void
+;
+ %accum.i.flat = alloca [2 x double], align 4
+ call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %accum.i.flat)
+ call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 16, i1 false)
+ call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %accum.i.flat)
+ ret void
+}
+
+define void @replace_int16_memset_test() {
+; CHECK-LABEL: define void @replace_int16_memset_test() {
+; CHECK-NEXT: [[CACHE_I:%.*]] = alloca [2 x i16], align 2
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[CACHE_I]], i32 0
+; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i16, ptr [[CACHE_I]], i32 1
+; CHECK-NEXT: store i16 0, ptr [[GEP1]], align 2
+; CHECK-NEXT: ret void
+;
+ %cache.i = alloca [2 x i16], align 2
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cache.i)
+ call void @llvm.memset.p0.i32(ptr nonnull align 2 dereferenceable(4) %cache.i, i8 0, i32 4, i1 false)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cache.i)
+ ret void
+}
+
+define void @replace_int_memset_test() {
+; CHECK-LABEL: define void @replace_int_memset_test() {
+; CHECK-NEXT: [[ACCUM_I_FLAT:%.*]] = alloca [1 x i32], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[ACCUM_I_FLAT]], i32 0
+; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
+; CHECK-NEXT: ret void
+;
+ %accum.i.flat = alloca [1 x i32], align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %accum.i.flat)
+ call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(8) %accum.i.flat, i8 0, i32 4, i1 false)
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %accum.i.flat)
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index b1bd9f16f4efa..6b4a39a6d49ad 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -13,7 +13,6 @@
; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: ModulePass Manager
-; CHECK-NEXT: DXIL Finalize Linkage
; CHECK-NEXT: DXIL Intrinsic Expansion
; CHECK-NEXT: DXIL CBuffer Access
; CHECK-NEXT: DXIL Data Scalarization
@@ -23,6 +22,7 @@
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Scalarize vector operations
; CHECK-NEXT: DXIL Legalizer
+; CHECK-NEXT: DXIL Finalize Linkage
; CHECK-NEXT: DXIL Resource Binding Analysis
; CHECK-NEXT: DXIL Module Metadata analysis
; CHECK-NEXT: DXIL Shader Flag Analysis
|
395ba04
to
9f65060
Compare
@@ -106,6 +105,7 @@ class DirectXPassConfig : public TargetPassConfig { | |||
DxilScalarOptions.ScalarizeLoadStore = true; | |||
addPass(createScalarizerPass(DxilScalarOptions)); | |||
addPass(createDXILLegalizeLegacyPass()); | |||
addPass(createDXILFinalizeLinkageLegacyPass()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the legalize legacy pass is a function level pass so it leaves behind the intrinsic decleration in the module. finalize linkage has a cleanup that will remove these dead intrinsics declarations so doing this after legalization works better for cleanup purposes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems fine, but do note that if we do #134260 then we can't rely on this ordering.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we do that we can make legalization a module level pass and then we can do the cleanup of declares without FinalizeLinkage.
7d34082
to
fb0069e
Compare
The HLSL Test Github action failures are because of:
This change doesn't touch that so will likely get fixed the next time i rebase. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good. A few minor comments.
ConstantInt *SizeCI, | ||
DenseMap<Value *, Value *> &ReplacedValues) { | ||
LLVMContext &Ctx = Builder.getContext(); | ||
[[maybe_unused]] DataLayout DL = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should probably be const DataLayout &
- we don't usually copy these.
// Note for i8 replacements if we know them we should use them. | ||
// Further if this is a constant ReplacedValues will return null | ||
// so we will stick to TypedVal = Val | ||
if (ReplacedValues[Val]) | ||
TypedVal = ReplacedValues[Val]; | ||
// This case Val is a ConstantInt so the cast folds away. | ||
// However if we don't do the cast the store below ends up being | ||
// an i8. | ||
else | ||
TypedVal = Builder.CreateIntCast(Val, ElemTy, false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find the comment block before the else
kind of confusing. Maybe use braces and move the comments into the two blocks?
// Note for i8 replacements if we know them we should use them. | |
// Further if this is a constant ReplacedValues will return null | |
// so we will stick to TypedVal = Val | |
if (ReplacedValues[Val]) | |
TypedVal = ReplacedValues[Val]; | |
// This case Val is a ConstantInt so the cast folds away. | |
// However if we don't do the cast the store below ends up being | |
// an i8. | |
else | |
TypedVal = Builder.CreateIntCast(Val, ElemTy, false); | |
if (ReplacedValues[Val]) { | |
// Note for i8 replacements if we know them we should use them. Further if | |
// this is a constant ReplacedValues will return null so we will stick to | |
// TypedVal = Val | |
TypedVal = ReplacedValues[Val]; | |
} else { | |
// This case Val is a ConstantInt so the cast folds away. However if we | |
// don't do the cast the store below ends up being an i8. | |
TypedVal = Builder.CreateIntCast(Val, ElemTy, false); | |
} |
if (CallInst *CI = dyn_cast<CallInst>(&I)) { | ||
Intrinsic::ID ID = CI->getIntrinsicID(); | ||
if (ID == Intrinsic::memset) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd probably use early exits instead of nesting (Though in this case the block is small so I don't feel too strongly about it)
[[maybe_unused]] ConstantInt *Size = | ||
dyn_cast<ConstantInt>(CI->getArgOperand(2)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is used in emitMemsetExpansion
, so we don't need the [[maybe_unused]]
@@ -106,6 +105,7 @@ class DirectXPassConfig : public TargetPassConfig { | |||
DxilScalarOptions.ScalarizeLoadStore = true; | |||
addPass(createScalarizerPass(DxilScalarOptions)); | |||
addPass(createDXILLegalizeLegacyPass()); | |||
addPass(createDXILFinalizeLinkageLegacyPass()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems fine, but do note that if we do #134260 then we can't rely on this ordering.
fixes llvm#136243 This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores. DXIL does not support i8 plus this reduces the total number of gep and store instructions. This change also moves DXILFinalizeLinkage to run after Legalization to clean up any dead intrinsic definitions.
…we want to do i8 load store cleanup later.
aa21c62
to
53690ba
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: The comment above the for-loop in upcastI8AllocasAndUses is no longer accurate. The for-loop not only gathers all cast targets, it now also gathers memsets
llvm-project/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
Lines 178 to 179 in 53690ba
// Gather all cast targets | |
for (User *U : AI->users()) { |
@@ -239,6 +247,77 @@ downcastI64toI32InsertExtractElements(Instruction &I, | |||
} | |||
} | |||
|
|||
void emitMemsetExpansion(IRBuilder<> &Builder, Value *Dst, Value *Val, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this function also be static?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Just had some comments on minor/nitpicky issues
…sics This moves the responsibility for cleaning up dead intrinsics from DXILFinalizeLinkage to DXILOpLowering, and moves DXILFinalizeLinkage back to it's pre-llvm#136244 place in the pipeline. Doing this avoids issues with DXIL passes running on obviously dead code, and makes it more clear what DXILFinalizeLinkage is really doing. This also helps with the story for llvm#134260, as cleaning up dead intrinsics doesn't make sense if this becomes a more generic pass. Note that test/CodeGen/DirectX/remove-dead-intriniscs.ll already covers most of the testing here. It'd be nice to have something that catches the regression from changing the pass ordering but I couldn't come up with anything that wouldn't be incredibly fragile. Fixes llvm#138180.
…sics (#138199) This moves the responsibility for cleaning up dead intrinsics from DXILFinalizeLinkage to DXILOpLowering, and moves DXILFinalizeLinkage back to it's pre-#136244 place in the pipeline. Doing this avoids issues with DXIL passes running on obviously dead code, and makes it more clear what DXILFinalizeLinkage is really doing. This also helps with the story for #134260, as cleaning up dead intrinsics doesn't make sense if this becomes a more generic pass. Note that test/CodeGen/DirectX/remove-dead-intriniscs.ll already covers most of the testing here. It'd be nice to have something that catches the regression from changing the pass ordering but I couldn't come up with anything that wouldn't be incredibly fragile. Fixes #138180.
fixes llvm#136243 This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores. DXIL does not support i8 plus this reduces the total number of gep and store instructions. This change also moves DXILFinalizeLinkage to run after Legalization to clean up any dead intrinsic definitions.
…sics (llvm#138199) This moves the responsibility for cleaning up dead intrinsics from DXILFinalizeLinkage to DXILOpLowering, and moves DXILFinalizeLinkage back to it's pre-llvm#136244 place in the pipeline. Doing this avoids issues with DXIL passes running on obviously dead code, and makes it more clear what DXILFinalizeLinkage is really doing. This also helps with the story for llvm#134260, as cleaning up dead intrinsics doesn't make sense if this becomes a more generic pass. Note that test/CodeGen/DirectX/remove-dead-intriniscs.ll already covers most of the testing here. It'd be nice to have something that catches the regression from changing the pass ordering but I couldn't come up with anything that wouldn't be incredibly fragile. Fixes llvm#138180.
fixes llvm#136243 This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores. DXIL does not support i8 plus this reduces the total number of gep and store instructions. This change also moves DXILFinalizeLinkage to run after Legalization to clean up any dead intrinsic definitions.
…sics (llvm#138199) This moves the responsibility for cleaning up dead intrinsics from DXILFinalizeLinkage to DXILOpLowering, and moves DXILFinalizeLinkage back to it's pre-llvm#136244 place in the pipeline. Doing this avoids issues with DXIL passes running on obviously dead code, and makes it more clear what DXILFinalizeLinkage is really doing. This also helps with the story for llvm#134260, as cleaning up dead intrinsics doesn't make sense if this becomes a more generic pass. Note that test/CodeGen/DirectX/remove-dead-intriniscs.ll already covers most of the testing here. It'd be nice to have something that catches the regression from changing the pass ordering but I couldn't come up with anything that wouldn't be incredibly fragile. Fixes llvm#138180.
fixes llvm#136243 This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores. DXIL does not support i8 plus this reduces the total number of gep and store instructions. This change also moves DXILFinalizeLinkage to run after Legalization to clean up any dead intrinsic definitions.
…sics (llvm#138199) This moves the responsibility for cleaning up dead intrinsics from DXILFinalizeLinkage to DXILOpLowering, and moves DXILFinalizeLinkage back to it's pre-llvm#136244 place in the pipeline. Doing this avoids issues with DXIL passes running on obviously dead code, and makes it more clear what DXILFinalizeLinkage is really doing. This also helps with the story for llvm#134260, as cleaning up dead intrinsics doesn't make sense if this becomes a more generic pass. Note that test/CodeGen/DirectX/remove-dead-intriniscs.ll already covers most of the testing here. It'd be nice to have something that catches the regression from changing the pass ordering but I couldn't come up with anything that wouldn't be incredibly fragile. Fixes llvm#138180.
fixes llvm#136243 This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores. DXIL does not support i8 plus this reduces the total number of gep and store instructions. This change also moves DXILFinalizeLinkage to run after Legalization to clean up any dead intrinsic definitions.
…sics (llvm#138199) This moves the responsibility for cleaning up dead intrinsics from DXILFinalizeLinkage to DXILOpLowering, and moves DXILFinalizeLinkage back to it's pre-llvm#136244 place in the pipeline. Doing this avoids issues with DXIL passes running on obviously dead code, and makes it more clear what DXILFinalizeLinkage is really doing. This also helps with the story for llvm#134260, as cleaning up dead intrinsics doesn't make sense if this becomes a more generic pass. Note that test/CodeGen/DirectX/remove-dead-intriniscs.ll already covers most of the testing here. It'd be nice to have something that catches the regression from changing the pass ordering but I couldn't come up with anything that wouldn't be incredibly fragile. Fixes llvm#138180.
This reverts commit 02e316c.
fixes llvm#136243 This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores. DXIL does not support i8 plus this reduces the total number of gep and store instructions. This change also moves DXILFinalizeLinkage to run after Legalization to clean up any dead intrinsic definitions.
fixes #136243
This change converts memset into a series of geps and stores It is intentionally limited to memsets of fixed size It also converts the byte stores to type stores.
DXIL does not support i8 plus this reduces the total number of gep and store instructions.
This change also moves DXILFinalizeLinkage to run after Legalization to clean up any dead intrinsic definitions.