Skip to content

Commit e975ff0

Browse files
committed
[X86] matchAddressRecursively - don't fold zext(shl(x,c)) -> shl(zext(x),c)) if the pattern has multiple uses
Fixes #97533 crash where we hit a case where the root node had referenced the original zext node, which we then deleted - hopefully I can come up with a better solution, but the codegen changes don't look too bad atm (pulls out a shift from some complex LEA nodes that shared the scaled index).
1 parent 8270485 commit e975ff0

File tree

4 files changed

+47
-13
lines changed

4 files changed

+47
-13
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -2745,7 +2745,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
27452745
Src = Src.getOperand(0);
27462746
}
27472747

2748-
if (Src.getOpcode() == ISD::SHL && Src.hasOneUse()) {
2748+
if (Src.getOpcode() == ISD::SHL && Src.hasOneUse() && N->hasOneUse()) {
27492749
// Give up if the shift is not a valid scale factor [1,2,3].
27502750
SDValue ShlSrc = Src.getOperand(0);
27512751
SDValue ShlAmt = Src.getOperand(1);

llvm/test/CodeGen/X86/addr-mode-matcher-3.ll

+28
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,31 @@ define i32 @mask_offset_scale_zext_i32_i64(ptr %base, i32 %i) {
7070
%load = load i32, ptr %arrayidx, align 4
7171
ret i32 %load
7272
}
73+
74+
; PR97533 - multiple uses of shl node (add + gep) in the same dependency chain.
75+
define i64 @add_shl_zext(ptr %ptr, i8 %arg) nounwind {
76+
; X86-LABEL: add_shl_zext:
77+
; X86: # %bb.0:
78+
; X86-NEXT: pushl %esi
79+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
80+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
81+
; X86-NEXT: movl 4(%esi,%ecx,4), %edx
82+
; X86-NEXT: leal (,%ecx,8), %eax
83+
; X86-NEXT: addl (%esi,%ecx,4), %eax
84+
; X86-NEXT: adcl $0, %edx
85+
; X86-NEXT: popl %esi
86+
; X86-NEXT: retl
87+
;
88+
; X64-LABEL: add_shl_zext:
89+
; X64: # %bb.0:
90+
; X64-NEXT: movzbl %sil, %eax
91+
; X64-NEXT: shll $3, %eax
92+
; X64-NEXT: addq (%rdi,%rax), %rax
93+
; X64-NEXT: retq
94+
%idx = zext i8 %arg to i64
95+
%gep = getelementptr ptr, ptr %ptr, i64 %idx
96+
%val = load i64, ptr %gep, align 8
97+
%shl = shl i64 %idx, 3
98+
%sum = add i64 %val, %shl
99+
ret i64 %sum
100+
}

llvm/test/CodeGen/X86/sttni.ll

+12-8
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,10 @@ define i32 @pcmpestri_reg_diff_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs,
341341
; X64-NEXT: .LBB8_2: # %compare
342342
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
343343
; X64-NEXT: andl $7, %ecx
344-
; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax
344+
; X64-NEXT: addl %ecx, %ecx
345+
; X64-NEXT: movzwl -24(%rsp,%rcx), %eax
345346
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
346-
; X64-NEXT: subw -40(%rsp,%rcx,2), %ax
347+
; X64-NEXT: subw -40(%rsp,%rcx), %ax
347348
; X64-NEXT: movzwl %ax, %eax
348349
; X64-NEXT: retq
349350
entry:
@@ -481,9 +482,10 @@ define i32 @pcmpestri_mem_diff_i16(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32
481482
; X64-NEXT: .LBB11_2: # %compare
482483
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
483484
; X64-NEXT: andl $7, %ecx
484-
; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax
485+
; X64-NEXT: addl %ecx, %ecx
486+
; X64-NEXT: movzwl -24(%rsp,%rcx), %eax
485487
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
486-
; X64-NEXT: subw -40(%rsp,%rcx,2), %ax
488+
; X64-NEXT: subw -40(%rsp,%rcx), %ax
487489
; X64-NEXT: movzwl %ax, %eax
488490
; X64-NEXT: retq
489491
entry:
@@ -795,9 +797,10 @@ define i32 @pcmpistri_reg_diff_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind {
795797
; X64-NEXT: .LBB20_2: # %compare
796798
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
797799
; X64-NEXT: andl $7, %ecx
798-
; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax
800+
; X64-NEXT: addl %ecx, %ecx
801+
; X64-NEXT: movzwl -24(%rsp,%rcx), %eax
799802
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
800-
; X64-NEXT: subw -40(%rsp,%rcx,2), %ax
803+
; X64-NEXT: subw -40(%rsp,%rcx), %ax
801804
; X64-NEXT: movzwl %ax, %eax
802805
; X64-NEXT: retq
803806
entry:
@@ -915,9 +918,10 @@ define i32 @pcmpistri_mem_diff_i16(ptr %lhs_ptr, ptr %rhs_ptr) nounwind {
915918
; X64-NEXT: .LBB23_2: # %compare
916919
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
917920
; X64-NEXT: andl $7, %ecx
918-
; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax
921+
; X64-NEXT: addl %ecx, %ecx
922+
; X64-NEXT: movzwl -24(%rsp,%rcx), %eax
919923
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
920-
; X64-NEXT: subw -40(%rsp,%rcx,2), %ax
924+
; X64-NEXT: subw -40(%rsp,%rcx), %ax
921925
; X64-NEXT: movzwl %ax, %eax
922926
; X64-NEXT: retq
923927
entry:

llvm/test/CodeGen/X86/var-permute-128.ll

+6-4
Original file line numberDiff line numberDiff line change
@@ -1108,8 +1108,9 @@ define void @indices_convert() {
11081108
; SSE3-NEXT: movaps %xmm0, -56(%rsp)
11091109
; SSE3-NEXT: movaps %xmm0, -72(%rsp)
11101110
; SSE3-NEXT: andl $3, %eax
1111-
; SSE3-NEXT: movsd -72(%rsp,%rax,8), %xmm0 # xmm0 = mem[0],zero
1112-
; SSE3-NEXT: movsd -40(%rsp,%rax,8), %xmm1 # xmm1 = mem[0],zero
1111+
; SSE3-NEXT: shll $3, %eax
1112+
; SSE3-NEXT: movsd -72(%rsp,%rax), %xmm0 # xmm0 = mem[0],zero
1113+
; SSE3-NEXT: movsd -40(%rsp,%rax), %xmm1 # xmm1 = mem[0],zero
11131114
; SSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
11141115
; SSE3-NEXT: movups %xmm1, (%rax)
11151116
; SSE3-NEXT: retq
@@ -1123,8 +1124,9 @@ define void @indices_convert() {
11231124
; SSSE3-NEXT: movaps %xmm0, -56(%rsp)
11241125
; SSSE3-NEXT: movaps %xmm0, -72(%rsp)
11251126
; SSSE3-NEXT: andl $3, %eax
1126-
; SSSE3-NEXT: movsd -72(%rsp,%rax,8), %xmm0 # xmm0 = mem[0],zero
1127-
; SSSE3-NEXT: movsd -40(%rsp,%rax,8), %xmm1 # xmm1 = mem[0],zero
1127+
; SSSE3-NEXT: shll $3, %eax
1128+
; SSSE3-NEXT: movsd -72(%rsp,%rax), %xmm0 # xmm0 = mem[0],zero
1129+
; SSSE3-NEXT: movsd -40(%rsp,%rax), %xmm1 # xmm1 = mem[0],zero
11281130
; SSSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
11291131
; SSSE3-NEXT: movups %xmm1, (%rax)
11301132
; SSSE3-NEXT: retq

0 commit comments

Comments
 (0)