@@ -7,141 +7,138 @@ define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) {
7
7
; CHECK-LABEL: issue63986:
8
8
; CHECK: ; %bb.0: ; %entry
9
9
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10
- ; CHECK-NEXT: v_lshlrev_b64 v[8:9], 6, v[2:3]
11
- ; CHECK-NEXT: v_mov_b32_e32 v4, s17
12
- ; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s16, v8
13
- ; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v4, v9, vcc
14
- ; CHECK-NEXT: ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge
15
- ; CHECK-NEXT: v_mov_b32_e32 v4, 0
16
- ; CHECK-NEXT: v_mov_b32_e32 v5, 0
17
- ; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
10
+ ; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3]
11
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s17
12
+ ; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s16, v4
13
+ ; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v6, v5, vcc
18
14
; CHECK-NEXT: s_mov_b64 s[4:5], 0
19
- ; CHECK-NEXT: s_waitcnt vmcnt(0)
20
- ; CHECK-NEXT: .LBB0_2: ; %loop-memcpy-expansion
15
+ ; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion
21
16
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
22
- ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s4, v10
17
+ ; CHECK-NEXT: v_mov_b32_e32 v7, s5
18
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s4
19
+ ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
20
+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v8
23
21
; CHECK-NEXT: s_add_u32 s4, s4, 16
24
- ; CHECK-NEXT: v_mov_b32_e32 v13, s5
25
22
; CHECK-NEXT: s_addc_u32 s5, s5, 0
26
23
; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
27
- ; CHECK-NEXT: v_addc_co_u32_e32 v13 , vcc, v11, v13 , vcc
24
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
28
25
; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
29
- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
30
- ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
31
- ; CHECK-NEXT: s_cbranch_vccz .LBB0_2
32
- ; CHECK-NEXT: ; %bb.3: ; %loop-memcpy-residual-header
26
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
27
+ ; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13]
28
+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_1
29
+ ; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header
30
+ ; CHECK-NEXT: s_branch .LBB0_4
31
+ ; CHECK-NEXT: ; %bb.3:
32
+ ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
33
33
; CHECK-NEXT: s_branch .LBB0_5
34
- ; CHECK-NEXT: ; %bb.4:
35
- ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
36
- ; CHECK-NEXT: s_branch .LBB0_6
37
- ; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
38
- ; CHECK-NEXT: v_lshlrev_b64 v[2:3], 6, v[2:3]
39
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_9
40
- ; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge
41
- ; CHECK-NEXT: v_mov_b32_e32 v2, 0
42
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0
43
- ; CHECK-NEXT: flat_load_ubyte v2, v[2:3]
44
- ; CHECK-NEXT: s_add_u32 s6, s16, 32
45
- ; CHECK-NEXT: s_addc_u32 s4, s17, 0
46
- ; CHECK-NEXT: v_mov_b32_e32 v4, s4
47
- ; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s6, v8
34
+ ; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
35
+ ; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3]
36
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_8
37
+ ; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader
38
+ ; CHECK-NEXT: s_add_u32 s4, s16, 32
39
+ ; CHECK-NEXT: s_addc_u32 s5, s17, 0
40
+ ; CHECK-NEXT: v_mov_b32_e32 v3, s5
41
+ ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s4, v4
42
+ ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
48
43
; CHECK-NEXT: s_mov_b64 s[4:5], 0
49
- ; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v9, vcc
50
- ; CHECK-NEXT: s_waitcnt vmcnt(0)
51
- ; CHECK-NEXT: ; %bb.7: ; %loop-memcpy-residual
52
- ; CHECK-NEXT: v_mov_b32_e32 v6, s5
53
- ; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, s4, v3
44
+ ; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual
45
+ ; CHECK-NEXT: s_add_u32 s6, 32, s4
46
+ ; CHECK-NEXT: s_addc_u32 s7, 0, s5
47
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s6
48
+ ; CHECK-NEXT: v_mov_b32_e32 v7, s7
49
+ ; CHECK-NEXT: flat_load_ubyte v10, v[6:7]
50
+ ; CHECK-NEXT: v_mov_b32_e32 v7, s5
51
+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v2
52
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc
54
53
; CHECK-NEXT: s_add_u32 s4, s4, 1
55
- ; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v4, v6, vcc
56
54
; CHECK-NEXT: s_addc_u32 s5, s5, 0
57
- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
58
- ; CHECK-NEXT: flat_store_byte v[5:6 ], v2
59
- ; CHECK-NEXT: ; %bb.8 :
60
- ; CHECK-NEXT: v_mov_b32_e32 v2, v8
61
- ; CHECK-NEXT: v_mov_b32_e32 v3, v9
62
- ; CHECK-NEXT: .LBB0_9 : ; %post-loop-memcpy-expansion
63
- ; CHECK-NEXT: v_and_b32_e32 v6 , 15, v0
55
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
56
+ ; CHECK-NEXT: flat_store_byte v[6:7 ], v10
57
+ ; CHECK-NEXT: ; %bb.7 :
58
+ ; CHECK-NEXT: v_mov_b32_e32 v7, v5
59
+ ; CHECK-NEXT: v_mov_b32_e32 v6, v4
60
+ ; CHECK-NEXT: .LBB0_8 : ; %post-loop-memcpy-expansion
61
+ ; CHECK-NEXT: v_and_b32_e32 v2 , 15, v0
64
62
; CHECK-NEXT: v_and_b32_e32 v0, -16, v0
65
- ; CHECK-NEXT: v_add_co_u32_e32 v2 , vcc, v2 , v0
66
- ; CHECK-NEXT: v_mov_b32_e32 v7 , 0
67
- ; CHECK-NEXT: v_addc_co_u32_e32 v3 , vcc, v3 , v1, vcc
63
+ ; CHECK-NEXT: v_add_co_u32_e32 v4 , vcc, v6 , v0
64
+ ; CHECK-NEXT: v_mov_b32_e32 v3 , 0
65
+ ; CHECK-NEXT: v_addc_co_u32_e32 v5 , vcc, v7 , v1, vcc
68
66
; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
69
- ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[6:7]
70
- ; CHECK-NEXT: v_mov_b32_e32 v4, s17
71
- ; CHECK-NEXT: v_mov_b32_e32 v8, 0
72
- ; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s16, v2
73
- ; CHECK-NEXT: v_mov_b32_e32 v9, 0
74
- ; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v4, v3, vcc
75
- ; CHECK-NEXT: s_branch .LBB0_12
76
- ; CHECK-NEXT: .LBB0_10: ; %Flow14
77
- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
67
+ ; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
68
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s17
69
+ ; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, s16, v4
70
+ ; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc
71
+ ; CHECK-NEXT: s_branch .LBB0_11
72
+ ; CHECK-NEXT: .LBB0_9: ; %Flow14
73
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
78
74
; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
79
75
; CHECK-NEXT: s_mov_b64 s[8:9], 0
80
- ; CHECK-NEXT: .LBB0_11 : ; %Flow16
81
- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
76
+ ; CHECK-NEXT: .LBB0_10 : ; %Flow16
77
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
82
78
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9]
83
- ; CHECK-NEXT: s_cbranch_vccz .LBB0_20
84
- ; CHECK-NEXT: .LBB0_12 : ; %while.cond
79
+ ; CHECK-NEXT: s_cbranch_vccz .LBB0_19
80
+ ; CHECK-NEXT: .LBB0_11 : ; %while.cond
85
81
; CHECK-NEXT: ; =>This Loop Header: Depth=1
86
- ; CHECK-NEXT: ; Child Loop BB0_14 Depth 2
87
- ; CHECK-NEXT: ; Child Loop BB0_18 Depth 2
82
+ ; CHECK-NEXT: ; Child Loop BB0_13 Depth 2
83
+ ; CHECK-NEXT: ; Child Loop BB0_17 Depth 2
88
84
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
89
- ; CHECK-NEXT: s_cbranch_execz .LBB0_15
90
- ; CHECK-NEXT: ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge
91
- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
92
- ; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[8:9]
85
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_14
86
+ ; CHECK-NEXT: ; %bb.12: ; %loop-memcpy-expansion2.preheader
87
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
93
88
; CHECK-NEXT: s_mov_b64 s[10:11], 0
94
89
; CHECK-NEXT: s_mov_b64 s[12:13], 0
95
- ; CHECK-NEXT: s_waitcnt vmcnt(0)
96
- ; CHECK-NEXT: .LBB0_14: ; %loop-memcpy-expansion2
97
- ; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
90
+ ; CHECK-NEXT: .LBB0_13: ; %loop-memcpy-expansion2
91
+ ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
98
92
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
99
- ; CHECK-NEXT: v_mov_b32_e32 v15, s13
100
- ; CHECK-NEXT: v_add_co_u32_e32 v14, vcc, s12, v10
93
+ ; CHECK-NEXT: v_mov_b32_e32 v6, s12
94
+ ; CHECK-NEXT: v_mov_b32_e32 v7, s13
95
+ ; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7]
96
+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s12, v8
101
97
; CHECK-NEXT: s_add_u32 s12, s12, 16
102
- ; CHECK-NEXT: v_addc_co_u32_e32 v15 , vcc, v11, v15 , vcc
98
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v9, v7 , vcc
103
99
; CHECK-NEXT: s_addc_u32 s13, s13, 0
104
100
; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
105
- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
106
- ; CHECK-NEXT: flat_store_dwordx4 v[14:15], v[2:5]
107
101
; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
102
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
103
+ ; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13]
108
104
; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
109
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_14
110
- ; CHECK-NEXT: .LBB0_15 : ; %Flow15
111
- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
105
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_13
106
+ ; CHECK-NEXT: .LBB0_14 : ; %Flow15
107
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
112
108
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
113
109
; CHECK-NEXT: s_mov_b64 s[8:9], -1
114
- ; CHECK-NEXT: s_cbranch_execz .LBB0_11
115
- ; CHECK-NEXT: ; %bb.16 : ; %loop-memcpy-residual-header5
116
- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
110
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_10
111
+ ; CHECK-NEXT: ; %bb.15 : ; %loop-memcpy-residual-header5
112
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
117
113
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
118
114
; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9]
119
- ; CHECK-NEXT: s_cbranch_execz .LBB0_10
120
- ; CHECK-NEXT: ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge
121
- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
122
- ; CHECK-NEXT: flat_load_ubyte v2, v[8:9]
115
+ ; CHECK-NEXT: s_cbranch_execz .LBB0_9
116
+ ; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual4.preheader
117
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
123
118
; CHECK-NEXT: s_mov_b64 s[12:13], 0
124
119
; CHECK-NEXT: s_mov_b64 s[14:15], 0
125
- ; CHECK-NEXT: s_waitcnt vmcnt(0)
126
- ; CHECK-NEXT: .LBB0_18: ; %loop-memcpy-residual4
127
- ; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
120
+ ; CHECK-NEXT: .LBB0_17: ; %loop-memcpy-residual4
121
+ ; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1
128
122
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
129
- ; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s14, v12
123
+ ; CHECK-NEXT: v_mov_b32_e32 v10, s15
124
+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v0
125
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v10, vcc
126
+ ; CHECK-NEXT: flat_load_ubyte v11, v[6:7]
127
+ ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v4
130
128
; CHECK-NEXT: s_add_u32 s14, s14, 1
131
- ; CHECK-NEXT: v_mov_b32_e32 v4, s15
132
129
; CHECK-NEXT: s_addc_u32 s15, s15, 0
133
- ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7 ]
134
- ; CHECK-NEXT: v_addc_co_u32_e32 v4 , vcc, v13, v4 , vcc
130
+ ; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3 ]
131
+ ; CHECK-NEXT: v_addc_co_u32_e32 v7 , vcc, v5, v10 , vcc
135
132
; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
136
- ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
137
- ; CHECK-NEXT: flat_store_byte v[3:4 ], v2
133
+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
134
+ ; CHECK-NEXT: flat_store_byte v[6:7 ], v11
138
135
; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13]
139
- ; CHECK-NEXT: s_cbranch_execnz .LBB0_18
140
- ; CHECK-NEXT: ; %bb.19 : ; %Flow
141
- ; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
136
+ ; CHECK-NEXT: s_cbranch_execnz .LBB0_17
137
+ ; CHECK-NEXT: ; %bb.18 : ; %Flow
138
+ ; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1
142
139
; CHECK-NEXT: s_or_b64 exec, exec, s[12:13]
143
- ; CHECK-NEXT: s_branch .LBB0_10
144
- ; CHECK-NEXT: .LBB0_20 : ; %DummyReturnBlock
140
+ ; CHECK-NEXT: s_branch .LBB0_9
141
+ ; CHECK-NEXT: .LBB0_19 : ; %DummyReturnBlock
145
142
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
146
143
; CHECK-NEXT: s_setpc_b64 s[30:31]
147
144
entry:
0 commit comments