@@ -763,6 +763,75 @@ define <16 x i8> @combine_and_pshufb_or_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
763
763
ret <16 x i8 > %4
764
764
}
765
765
766
+ define <16 x i8 > @combine_lshr_pshufb (<4 x i32 > %a0 ) {
767
+ ; SSE-LABEL: combine_lshr_pshufb:
768
+ ; SSE: # %bb.0:
769
+ ; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[3,5,6,7,4,10,11],zero,xmm0[9,14,15],zero,zero
770
+ ; SSE-NEXT: retq
771
+ ;
772
+ ; AVX1-LABEL: combine_lshr_pshufb:
773
+ ; AVX1: # %bb.0:
774
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[3,5,6,7,4,10,11],zero,xmm0[9,14,15],zero,zero
775
+ ; AVX1-NEXT: retq
776
+ ;
777
+ ; AVX2-LABEL: combine_lshr_pshufb:
778
+ ; AVX2: # %bb.0:
779
+ ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
780
+ ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
781
+ ; AVX2-NEXT: retq
782
+ ;
783
+ ; AVX512F-LABEL: combine_lshr_pshufb:
784
+ ; AVX512F: # %bb.0:
785
+ ; AVX512F-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
786
+ ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
787
+ ; AVX512F-NEXT: retq
788
+ %shr = lshr <4 x i32 > %a0 , <i32 24 , i32 0 , i32 8 , i32 16 >
789
+ %bc = bitcast <4 x i32 > %shr to <16 x i8 >
790
+ %shuffle = shufflevector <16 x i8 > %bc , <16 x i8 > poison, <16 x i32 > <i32 1 , i32 2 , i32 3 , i32 0 , i32 5 , i32 6 , i32 7 , i32 4 , i32 9 , i32 10 , i32 11 , i32 8 , i32 12 , i32 13 , i32 14 , i32 15 >
791
+ ret <16 x i8 > %shuffle
792
+ }
793
+
794
+ define <16 x i8 > @combine_shl_pshufb (<4 x i32 > %a0 ) {
795
+ ; SSSE3-LABEL: combine_shl_pshufb:
796
+ ; SSSE3: # %bb.0:
797
+ ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
798
+ ; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
799
+ ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
800
+ ; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
801
+ ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
802
+ ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
803
+ ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
804
+ ; SSSE3-NEXT: retq
805
+ ;
806
+ ; SSE41-LABEL: combine_shl_pshufb:
807
+ ; SSE41: # %bb.0:
808
+ ; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
809
+ ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
810
+ ; SSE41-NEXT: retq
811
+ ;
812
+ ; AVX1-LABEL: combine_shl_pshufb:
813
+ ; AVX1: # %bb.0:
814
+ ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
815
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
816
+ ; AVX1-NEXT: retq
817
+ ;
818
+ ; AVX2-LABEL: combine_shl_pshufb:
819
+ ; AVX2: # %bb.0:
820
+ ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
821
+ ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
822
+ ; AVX2-NEXT: retq
823
+ ;
824
+ ; AVX512F-LABEL: combine_shl_pshufb:
825
+ ; AVX512F: # %bb.0:
826
+ ; AVX512F-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
827
+ ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
828
+ ; AVX512F-NEXT: retq
829
+ %shr = shl <4 x i32 > %a0 , <i32 0 , i32 8 , i32 16 , i32 16 >
830
+ %bc = bitcast <4 x i32 > %shr to <16 x i8 >
831
+ %shuffle = shufflevector <16 x i8 > %bc , <16 x i8 > poison, <16 x i32 > <i32 1 , i32 2 , i32 3 , i32 0 , i32 5 , i32 6 , i32 7 , i32 4 , i32 9 , i32 10 , i32 11 , i32 8 , i32 12 , i32 13 , i32 14 , i32 15 >
832
+ ret <16 x i8 > %shuffle
833
+ }
834
+
766
835
define <16 x i8 > @constant_fold_pshufb () {
767
836
; SSE-LABEL: constant_fold_pshufb:
768
837
; SSE: # %bb.0:
0 commit comments