diff --git a/clang/test/CodeGen/SystemZ/strictfp_builtins.c b/clang/test/CodeGen/SystemZ/strictfp_builtins.c index 8c8f1f4cabd74..b60fd932d31c3 100644 --- a/clang/test/CodeGen/SystemZ/strictfp_builtins.c +++ b/clang/test/CodeGen/SystemZ/strictfp_builtins.c @@ -9,7 +9,7 @@ // CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 15) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 15) // CHECK-NEXT: ret i32 [[TMP1]] // int test_isnan_float(float f) { @@ -21,7 +21,7 @@ int test_isnan_float(float f) { // CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f64(double [[TMP0]], i64 15) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f64(double [[TMP0]], i64 15) // CHECK-NEXT: ret i32 [[TMP1]] // int test_isnan_double(double d) { @@ -34,7 +34,7 @@ int test_isnan_double(double d) { // CHECK-NEXT: [[LD:%.*]] = load fp128, ptr [[TMP0:%.*]], align 8 // CHECK-NEXT: store fp128 [[LD]], ptr [[LD_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr [[LD_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.s390.tdc.f128(fp128 [[TMP1]], i64 15) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.s390.tdc.f128(fp128 [[TMP1]], i64 15) // CHECK-NEXT: ret i32 [[TMP2]] // int test_isnan_long_double(long double ld) { @@ -46,7 +46,7 @@ int test_isnan_long_double(long double ld) { // CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 48) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 48) // CHECK-NEXT: ret i32 [[TMP1]] // int test_isinf_float(float f) { @@ -58,7 +58,7 @@ int test_isinf_float(float f) { // CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f64(double [[TMP0]], i64 48) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f64(double [[TMP0]], i64 48) // CHECK-NEXT: ret i32 [[TMP1]] // int test_isinf_double(double d) { @@ -71,7 +71,7 @@ int test_isinf_double(double d) { // CHECK-NEXT: [[LD:%.*]] = load fp128, ptr [[TMP0:%.*]], align 8 // CHECK-NEXT: store fp128 [[LD]], ptr [[LD_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr [[LD_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.s390.tdc.f128(fp128 [[TMP1]], i64 48) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.s390.tdc.f128(fp128 [[TMP1]], i64 48) // CHECK-NEXT: ret i32 [[TMP2]] // int test_isinf_long_double(long double ld) { @@ -83,7 +83,7 @@ int test_isinf_long_double(long double ld) { // CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 4032) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 4032) // CHECK-NEXT: ret i32 [[TMP1]] // int test_isfinite_float(float f) { @@ -95,7 +95,7 @@ int test_isfinite_float(float f) { // CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f64(double [[TMP0]], i64 4032) #[[ATTR2]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f64(double [[TMP0]], i64 4032) // CHECK-NEXT: ret i32 [[TMP1]] // int test_isfinite_double(double d) { @@ -108,7 +108,7 @@ int test_isfinite_double(double d) { // CHECK-NEXT: [[LD:%.*]] = load fp128, ptr [[TMP0:%.*]], align 8 // CHECK-NEXT: store fp128 [[LD]], ptr [[LD_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load fp128, ptr [[LD_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.s390.tdc.f128(fp128 [[TMP1]], i64 4032) #[[ATTR2]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.s390.tdc.f128(fp128 [[TMP1]], i64 4032) // CHECK-NEXT: ret i32 [[TMP2]] // int test_isfinite_long_double(long double ld) { diff --git a/clang/test/CodeGen/X86/strictfp_builtins.c b/clang/test/CodeGen/X86/strictfp_builtins.c index 43e4060bef259..c4a22dc5ee90f 100644 --- a/clang/test/CodeGen/X86/strictfp_builtins.c +++ b/clang/test/CodeGen/X86/strictfp_builtins.c @@ -27,7 +27,7 @@ void p(char *str, int x) { // CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16 // CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) #[[ATTR3]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 516) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.1, i32 noundef [[TMP2]]) #[[ATTR3]] // CHECK-NEXT: ret void @@ -43,7 +43,7 @@ void test_long_double_isinf(long double ld) { // CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16 // CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) #[[ATTR3]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 504) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR3]] // CHECK-NEXT: ret void @@ -59,7 +59,7 @@ void test_long_double_isfinite(long double ld) { // CHECK-NEXT: [[LD_ADDR:%.*]] = alloca x86_fp80, align 16 // CHECK-NEXT: store x86_fp80 [[LD:%.*]], ptr [[LD_ADDR]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[LD_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) #[[ATTR3]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[TMP0]], i32 3) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR3]] // CHECK-NEXT: ret void diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index 88300041061aa..98735c4671ce5 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -1575,8 +1575,8 @@ _Complex float mulf(_Complex float a, _Complex float b) { // X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 // X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1 // X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 -// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]] -// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) +// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) // X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]] // X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] // X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: @@ -2658,8 +2658,8 @@ _Complex double muld(_Complex double a, _Complex double b) { // X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 // X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1 // X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 -// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]] -// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) +// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) // X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]] // X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] // X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: @@ -2713,8 +2713,8 @@ _Complex double muld(_Complex double a, _Complex double b) { // PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 // PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 // PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 -// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) #[[ATTR4]] -// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) +// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) // PRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]] // PRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] // PRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: @@ -3961,8 +3961,8 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) { // X86WINPRMTD_STRICT-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 // X86WINPRMTD_STRICT-NEXT: [[CONV:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR3]] // X86WINPRMTD_STRICT-NEXT: [[CONV1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR3]] -// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) #[[ATTR3]] -// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) +// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) // X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]] // X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] // X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: @@ -4038,8 +4038,8 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) { // PRMTD_STRICT-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 // PRMTD_STRICT-NEXT: [[CONV:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR4]] // PRMTD_STRICT-NEXT: [[CONV1:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]] -// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) #[[ATTR4]] -// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) +// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) // PRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]] // PRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] // PRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c index 1bf60b8fbca17..58849eec340c6 100644 --- a/clang/test/CodeGen/isfpclass.c +++ b/clang/test/CodeGen/isfpclass.c @@ -15,7 +15,7 @@ _Bool check_isfpclass_finite(float x) { // CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_finite_strict // CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 504) #[[ATTR5:[0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 504) // CHECK-NEXT: ret i1 [[TMP0]] // _Bool check_isfpclass_finite_strict(float x) { @@ -36,7 +36,7 @@ _Bool check_isfpclass_nan_f32(float x) { // CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_nan_f32_strict // CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 3) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 3) // CHECK-NEXT: ret i1 [[TMP0]] // _Bool check_isfpclass_nan_f32_strict(float x) { @@ -57,7 +57,7 @@ _Bool check_isfpclass_snan_f64(double x) { // CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_snan_f64_strict // CHECK-SAME: (double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f64(double [[X]], i32 1) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f64(double [[X]], i32 1) // CHECK-NEXT: ret i1 [[TMP0]] // _Bool check_isfpclass_snan_f64_strict(double x) { @@ -78,7 +78,7 @@ _Bool check_isfpclass_zero_f16(_Float16 x) { // CHECK-LABEL: define dso_local noundef i1 @check_isfpclass_zero_f16_strict // CHECK-SAME: (half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f16(half [[X]], i32 96) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f16(half [[X]], i32 96) // CHECK-NEXT: ret i1 [[TMP0]] // _Bool check_isfpclass_zero_f16_strict(_Float16 x) { @@ -89,7 +89,7 @@ _Bool check_isfpclass_zero_f16_strict(_Float16 x) { // CHECK-LABEL: define dso_local noundef i1 @check_isnan // CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 3) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 3) // CHECK-NEXT: ret i1 [[TMP0]] // _Bool check_isnan(float x) { @@ -100,7 +100,7 @@ _Bool check_isnan(float x) { // CHECK-LABEL: define dso_local noundef i1 @check_isinf // CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 516) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 516) // CHECK-NEXT: ret i1 [[TMP0]] // _Bool check_isinf(float x) { @@ -111,7 +111,7 @@ _Bool check_isinf(float x) { // CHECK-LABEL: define dso_local noundef i1 @check_isfinite // CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 504) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 504) // CHECK-NEXT: ret i1 [[TMP0]] // _Bool check_isfinite(float x) { @@ -122,7 +122,7 @@ _Bool check_isfinite(float x) { // CHECK-LABEL: define dso_local noundef i1 @check_isnormal // CHECK-SAME: (float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 264) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[X]], i32 264) // CHECK-NEXT: ret i1 [[TMP0]] // _Bool check_isnormal(float x) { @@ -150,7 +150,7 @@ int4 check_isfpclass_nan_v4f32(float4 x) { // CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32 // CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // diff --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp index 175ad22601839..9ba1a557a90fe 100644 --- a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp +++ b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp @@ -20,7 +20,7 @@ float4 strict_fadd(float4 a, float4 b) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_absDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_ABS:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_ABS]] // float4 strict_elementwise_abs(float4 a) { @@ -50,7 +50,7 @@ float4 strict_elementwise_min(float4 a, float4 b) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_maximumDv4_fS_ // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_MAXIMUM:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_MAXIMUM:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) // CHECK-NEXT: ret <4 x float> [[ELT_MAXIMUM]] // float4 strict_elementwise_maximum(float4 a, float4 b) { @@ -60,7 +60,7 @@ float4 strict_elementwise_maximum(float4 a, float4 b) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_minimumDv4_fS_ // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_MINIMUM:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_MINIMUM:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) // CHECK-NEXT: ret <4 x float> [[ELT_MINIMUM]] // float4 strict_elementwise_minimum(float4 a, float4 b) { @@ -70,7 +70,7 @@ float4 strict_elementwise_minimum(float4 a, float4 b) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_ceilDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_CEIL:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_CEIL:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_CEIL]] // float4 strict_elementwise_ceil(float4 a) { @@ -80,7 +80,7 @@ float4 strict_elementwise_ceil(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_acosDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ACOS:%.*]] = tail call <4 x float> @llvm.acos.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_ACOS:%.*]] = tail call <4 x float> @llvm.acos.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_ACOS]] // float4 strict_elementwise_acos(float4 a) { @@ -90,7 +90,7 @@ float4 strict_elementwise_acos(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_cosDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_COS:%.*]] = tail call <4 x float> @llvm.cos.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_COS:%.*]] = tail call <4 x float> @llvm.cos.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_COS]] // float4 strict_elementwise_cos(float4 a) { @@ -100,7 +100,7 @@ float4 strict_elementwise_cos(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_coshDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_COSH:%.*]] = tail call <4 x float> @llvm.cosh.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_COSH:%.*]] = tail call <4 x float> @llvm.cosh.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_COSH]] // float4 strict_elementwise_cosh(float4 a) { @@ -110,7 +110,7 @@ float4 strict_elementwise_cosh(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_expDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_EXP:%.*]] = tail call <4 x float> @llvm.exp.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_EXP:%.*]] = tail call <4 x float> @llvm.exp.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_EXP]] // float4 strict_elementwise_exp(float4 a) { @@ -120,7 +120,7 @@ float4 strict_elementwise_exp(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_exp2Dv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_EXP2:%.*]] = tail call <4 x float> @llvm.exp2.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_EXP2:%.*]] = tail call <4 x float> @llvm.exp2.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_EXP2]] // float4 strict_elementwise_exp2(float4 a) { @@ -130,7 +130,7 @@ float4 strict_elementwise_exp2(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_floorDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_FLOOR:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_FLOOR:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_FLOOR]] // float4 strict_elementwise_floor(float4 a) { @@ -140,7 +140,7 @@ float4 strict_elementwise_floor(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_logDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_LOG:%.*]] = tail call <4 x float> @llvm.log.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_LOG:%.*]] = tail call <4 x float> @llvm.log.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_LOG]] // float4 strict_elementwise_log(float4 a) { @@ -150,7 +150,7 @@ float4 strict_elementwise_log(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_log2Dv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_LOG2:%.*]] = tail call <4 x float> @llvm.log2.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_LOG2:%.*]] = tail call <4 x float> @llvm.log2.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_LOG2]] // float4 strict_elementwise_log2(float4 a) { @@ -160,7 +160,7 @@ float4 strict_elementwise_log2(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_log10Dv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_LOG2:%.*]] = tail call <4 x float> @llvm.log2.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_LOG2:%.*]] = tail call <4 x float> @llvm.log2.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_LOG2]] // float4 strict_elementwise_log10(float4 a) { @@ -170,7 +170,7 @@ float4 strict_elementwise_log10(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z28strict_elementwise_roundevenDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ROUNDEVEN:%.*]] = tail call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_ROUNDEVEN:%.*]] = tail call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_ROUNDEVEN]] // float4 strict_elementwise_roundeven(float4 a) { @@ -180,7 +180,7 @@ float4 strict_elementwise_roundeven(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_roundDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ROUND:%.*]] = tail call <4 x float> @llvm.round.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_ROUND:%.*]] = tail call <4 x float> @llvm.round.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_ROUND]] // float4 strict_elementwise_round(float4 a) { @@ -190,7 +190,7 @@ float4 strict_elementwise_round(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_rintDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_RINT:%.*]] = tail call <4 x float> @llvm.rint.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_RINT:%.*]] = tail call <4 x float> @llvm.rint.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_RINT]] // float4 strict_elementwise_rint(float4 a) { @@ -200,7 +200,7 @@ float4 strict_elementwise_rint(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z28strict_elementwise_nearbyintDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_NEARBYINT:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_NEARBYINT:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_NEARBYINT]] // float4 strict_elementwise_nearbyint(float4 a) { @@ -210,7 +210,7 @@ float4 strict_elementwise_nearbyint(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_asinDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ASIN:%.*]] = tail call <4 x float> @llvm.asin.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_ASIN:%.*]] = tail call <4 x float> @llvm.asin.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_ASIN]] // float4 strict_elementwise_asin(float4 a) { @@ -220,7 +220,7 @@ float4 strict_elementwise_asin(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_sinDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_SIN:%.*]] = tail call <4 x float> @llvm.sin.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_SIN:%.*]] = tail call <4 x float> @llvm.sin.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_SIN]] // float4 strict_elementwise_sin(float4 a) { @@ -230,7 +230,7 @@ float4 strict_elementwise_sin(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_sinhDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_SINH:%.*]] = tail call <4 x float> @llvm.sinh.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_SINH:%.*]] = tail call <4 x float> @llvm.sinh.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_SINH]] // float4 strict_elementwise_sinh(float4 a) { @@ -250,7 +250,7 @@ float4 strict_elementwise_sqrt(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_atanDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ATAN:%.*]] = tail call <4 x float> @llvm.atan.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_ATAN:%.*]] = tail call <4 x float> @llvm.atan.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_ATAN]] // float4 strict_elementwise_atan(float4 a) { @@ -260,7 +260,7 @@ float4 strict_elementwise_atan(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_tanDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_TAN:%.*]] = tail call <4 x float> @llvm.tan.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_TAN:%.*]] = tail call <4 x float> @llvm.tan.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_TAN]] // float4 strict_elementwise_tan(float4 a) { @@ -270,7 +270,7 @@ float4 strict_elementwise_tan(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_tanhDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_TANH:%.*]] = tail call <4 x float> @llvm.tanh.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_TANH:%.*]] = tail call <4 x float> @llvm.tanh.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_TANH]] // float4 strict_elementwise_tanh(float4 a) { @@ -280,7 +280,7 @@ float4 strict_elementwise_tanh(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_atan2Dv4_fS_ // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_ATAN2:%.*]] = tail call <4 x float> @llvm.atan2.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_ATAN2:%.*]] = tail call <4 x float> @llvm.atan2.v4f32(<4 x float> [[A]], <4 x float> [[B]]) // CHECK-NEXT: ret <4 x float> [[ELT_ATAN2]] // float4 strict_elementwise_atan2(float4 a, float4 b) { @@ -290,7 +290,7 @@ float4 strict_elementwise_atan2(float4 a, float4 b) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_TRUNC:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_TRUNC:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_TRUNC]] // float4 strict_elementwise_trunc(float4 a) { @@ -300,7 +300,7 @@ float4 strict_elementwise_trunc(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z31strict_elementwise_canonicalizeDv4_f // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) #[[ATTR4]] +// CHECK-NEXT: [[ELT_CANONICALIZE:%.*]] = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> [[A]]) // CHECK-NEXT: ret <4 x float> [[ELT_CANONICALIZE]] // float4 strict_elementwise_canonicalize(float4 a) { @@ -310,7 +310,7 @@ float4 strict_elementwise_canonicalize(float4 a) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z27strict_elementwise_copysignDv4_fS_ // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.copysign.v4f32(<4 x float> [[A]], <4 x float> [[B]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_copysign(float4 a, float4 b) { @@ -320,7 +320,7 @@ float4 strict_elementwise_copysign(float4 a, float4 b) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_fmaDv4_fS_S_ // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> [[A]], <4 x float> [[B]], <4 x float> [[C]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> [[A]], <4 x float> [[B]], <4 x float> [[C]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_fma(float4 a, float4 b, float4 c) { @@ -330,7 +330,7 @@ float4 strict_elementwise_fma(float4 a, float4 b, float4 c) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_powDv4_fS_ // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.pow.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.pow.v4f32(<4 x float> [[A]], <4 x float> [[B]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float4 strict_elementwise_pow(float4 a, float4 b) { @@ -340,9 +340,8 @@ float4 strict_elementwise_pow(float4 a, float4 b) { // CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_fmodDv4_fS_ // CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.frem.v4f32(<4 x float> [[A]], <4 x float> [[B]], -// CHECK-SAME: metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[TMP0]] +// CHECK-NEXT: [[FMOD:%.*]] = tail call <4 x float> @llvm.experimental.constrained.frem.v4f32(<4 x float> [[A]], <4 x float> [[B]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[FMOD]] // float4 strict_elementwise_fmod(float4 a, float4 b) { return __builtin_elementwise_fmod(a, b); diff --git a/clang/test/CodeGen/strictfp_builtins.c b/clang/test/CodeGen/strictfp_builtins.c index 58815c7de4fa9..20e9f85dce52d 100644 --- a/clang/test/CodeGen/strictfp_builtins.c +++ b/clang/test/CodeGen/strictfp_builtins.c @@ -60,7 +60,7 @@ void test_fpclassify(double d) { // CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2 // CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 516) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 516) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.2, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -76,7 +76,7 @@ void test_fp16_isinf(_Float16 h) { // CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 516) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 516) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.3, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -92,7 +92,7 @@ void test_float_isinf(float f) { // CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 516) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 516) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.4, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -108,7 +108,7 @@ void test_double_isinf(double d) { // CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2 // CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 504) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 504) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.5, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -124,7 +124,7 @@ void test_fp16_isfinite(_Float16 h) { // CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 504) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 504) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.6, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -140,7 +140,7 @@ void test_float_isfinite(float f) { // CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 504) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 504) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.7, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -176,7 +176,7 @@ void test_isinf_sign(double d) { // CHECK-NEXT: [[H_ADDR:%.*]] = alloca half, align 2 // CHECK-NEXT: store half [[H:%.*]], ptr [[H_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[H_ADDR]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 3) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[TMP0]], i32 3) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.9, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -192,7 +192,7 @@ void test_fp16_isnan(_Float16 h) { // CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 3) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f32(float [[TMP0]], i32 3) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.10, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -208,7 +208,7 @@ void test_float_isnan(float f) { // CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 3) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 3) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.11, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void @@ -224,7 +224,7 @@ void test_double_isnan(double d) { // CHECK-NEXT: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 264) #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[TMP0]], i32 264) // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 // CHECK-NEXT: call void @p(ptr noundef @.str.12, i32 noundef [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl index 451d30b4d86f0..36120b3c89d11 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl @@ -109,7 +109,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // STRICTFP-NEXT: store i32 0, ptr [[FLAGS]], align 4 // STRICTFP-NEXT: [[TMP0:%.*]] = load target("spirv.Queue"), ptr [[DEFAULT_QUEUE]], align 4 // STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS]], align 4 -// STRICTFP-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP]], ptr align 4 [[NDRANGE]], i32 4, i1 false) #[[ATTR5:[0-9]+]] +// STRICTFP-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP]], ptr align 4 [[NDRANGE]], i32 4, i1 false) // STRICTFP-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 0 // STRICTFP-NEXT: store i32 24, ptr [[BLOCK_SIZE]], align 4 // STRICTFP-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 1 @@ -126,7 +126,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 4 // STRICTFP-NEXT: store ptr addrspace(1) [[TMP4]], ptr [[BLOCK_CAPTURED2]], align 4 // STRICTFP-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[BLOCK]] to ptr addrspace(4) -// STRICTFP-NEXT: [[TMP6:%.*]] = call spir_func i32 @__enqueue_kernel_basic(target("spirv.Queue") [[TMP0]], i32 [[TMP1]], ptr [[TMP]], ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) [[TMP5]]) #[[ATTR5]] +// STRICTFP-NEXT: [[TMP6:%.*]] = call spir_func i32 @__enqueue_kernel_basic(target("spirv.Queue") [[TMP0]], i32 [[TMP1]], ptr [[TMP]], ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) [[TMP5]]) // STRICTFP-NEXT: ret void // // @@ -144,7 +144,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR1]], align 4 // STRICTFP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP0]], i32 [[TMP1]] // STRICTFP-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX]], align 4 -// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5]] +// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5:[0-9]+]] // STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 // STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR2]], align 4 // STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 @@ -158,7 +158,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // STRICTFP-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke_kernel // STRICTFP-SAME: (ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] { // STRICTFP-NEXT: entry: -// STRICTFP-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]]) #[[ATTR5]] +// STRICTFP-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]]) // STRICTFP-NEXT: ret void // //. diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index b73309175f20d..7b0b01253bfa8 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2443,24 +2443,13 @@ class IRBuilderBase { CallInst *CreateCall(FunctionType *FTy, Value *Callee, ArrayRef Args = {}, const Twine &Name = "", MDNode *FPMathTag = nullptr) { - CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles); - if (IsFPConstrained) - setConstrainedFPCallAttr(CI); - if (isa(CI)) - setFPAttrs(CI, FPMathTag, FMF); - return Insert(CI, Name); + return CreateCall(FTy, Callee, Args, DefaultOperandBundles, Name, + FPMathTag); } CallInst *CreateCall(FunctionType *FTy, Value *Callee, ArrayRef Args, ArrayRef OpBundles, - const Twine &Name = "", MDNode *FPMathTag = nullptr) { - CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles); - if (IsFPConstrained) - setConstrainedFPCallAttr(CI); - if (isa(CI)) - setFPAttrs(CI, FPMathTag, FMF); - return Insert(CI, Name); - } + const Twine &Name = "", MDNode *FPMathTag = nullptr); CallInst *CreateCall(FunctionCallee Callee, ArrayRef Args = {}, const Twine &Name = "", MDNode *FPMathTag = nullptr) { diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 3436216d478e3..5fb1c9ecf44ba 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -128,6 +128,9 @@ class IntrinsicInst : public CallInst { /// course of IR transformations static bool mayLowerToFunctionCall(Intrinsic::ID IID); + /// Check if the specified intrinsic can read or write FP environment. + static bool canAccessFPEnvironment(LLVMContext &C, Intrinsic::ID IID); + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const CallInst *I) { if (const Function *CF = I->getCalledFunction()) diff --git a/llvm/include/llvm/Support/ModRef.h b/llvm/include/llvm/Support/ModRef.h index 5a9d80c87ae27..d8690778abebb 100644 --- a/llvm/include/llvm/Support/ModRef.h +++ b/llvm/include/llvm/Support/ModRef.h @@ -212,6 +212,10 @@ template class MemoryEffectsBase { return getWithoutLoc(Location::InaccessibleMem).doesNotAccessMemory(); } + bool doesAccessInaccessibleMem() const { + return isModOrRefSet(getModRef(Location::InaccessibleMem)); + } + /// Whether this function only (at most) accesses argument and inaccessible /// memory. bool onlyAccessesInaccessibleOrArgMem() const { diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 27b499e42a4e4..4cf39af173aaa 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -86,6 +86,33 @@ IRBuilderBase::createCallHelper(Function *Callee, ArrayRef Ops, return CI; } +CallInst *IRBuilderBase::CreateCall(FunctionType *FTy, Value *Callee, + ArrayRef Args, + ArrayRef OpBundles, + const Twine &Name, MDNode *FPMathTag) { + bool CallAccessesFPEnv = false; + if (IsFPConstrained) { + if (const auto *Func = dyn_cast(Callee)) { + if (Intrinsic::ID ID = Func->getIntrinsicID()) { + LLVMContext &C = Func->getContext(); + CallAccessesFPEnv = IntrinsicInst::canAccessFPEnvironment(C, ID); + } + // A call to a function that has the 'StrictFP' attribute makes the call + // site 'StrictFP' also. + else if (Func->hasFnAttribute(Attribute::StrictFP)) { + CallAccessesFPEnv = true; + } + } + } + + CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles); + if (IsFPConstrained && CallAccessesFPEnv) + setConstrainedFPCallAttr(CI); + if (isa(CI)) + setFPAttrs(CI, FPMathTag, FMF); + return Insert(CI, Name); +} + Value *IRBuilderBase::CreateVScale(Constant *Scaling, const Twine &Name) { assert(isa(Scaling) && "Expected constant integer"); if (cast(Scaling)->isZero()) diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index ad174b1487a64..1c0cc13933039 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -66,6 +66,12 @@ bool IntrinsicInst::mayLowerToFunctionCall(Intrinsic::ID IID) { } } +bool IntrinsicInst::canAccessFPEnvironment(LLVMContext &C, Intrinsic::ID IID) { + AttributeList Attrs = Intrinsic::getAttributes(C, IID); + MemoryEffects ME = Attrs.getMemoryEffects(); + return ME.doesAccessInaccessibleMem(); +} + //===----------------------------------------------------------------------===// /// DbgVariableIntrinsic - This is the common base class for debug info /// intrinsics for variables. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll index f9c359bc114ed..b955278e85571 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll @@ -819,11 +819,11 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 { ; CHECK-LABEL: define float @test_pown_fast_f32_strictfp ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) #[[ATTR0]] -; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]]) #[[ATTR0]] -; CHECK-NEXT: [[POWNI2F:%.*]] = call fast float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[Y]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR0]] -; CHECK-NEXT: [[__YLOGX:%.*]] = call fast float @llvm.experimental.constrained.fmul.f32(float [[POWNI2F]], float [[__LOG2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR0]] -; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]]) #[[ATTR0]] +; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = call fast float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[Y]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[__YLOGX:%.*]] = call fast float @llvm.experimental.constrained.fmul.f32(float [[POWNI2F]], float [[__LOG2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32 ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]] diff --git a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll index 94d9092cda2b1..663a7a296d8e4 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll @@ -133,18 +133,18 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_sco define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7:[0-9]+]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] @@ -153,7 +153,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_un ; IR-ITERATIVE-NEXT: br label [[TMP16]] ; IR-ITERATIVE: 16: ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -164,18 +164,18 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_un ; IR-ITERATIVE-NEXT: ret float [[TMP24]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 -; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8:[0-9]+]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] @@ -184,7 +184,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_un ; IR-DPP-NEXT: br label [[TMP16]] ; IR-DPP: 16: ; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -200,23 +200,23 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_un define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("one-as") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] ; IR-ITERATIVE-NEXT: br label [[TMP17]] @@ -227,10 +227,10 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_un ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) ; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 @@ -242,31 +242,31 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_un ; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) ; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] ; IR-DPP: 26: @@ -274,8 +274,8 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_un ; IR-DPP-NEXT: br label [[TMP28]] ; IR-DPP: 28: ; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] -; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) ; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] ; IR-DPP-NEXT: br label [[TMP34]] @@ -289,16 +289,16 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_un define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -309,7 +309,7 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str ; IR-ITERATIVE-NEXT: br label [[TMP16]] ; IR-ITERATIVE: 16: ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -320,16 +320,16 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str ; IR-ITERATIVE-NEXT: ret float [[TMP24]] ; ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -340,7 +340,7 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str ; IR-DPP-NEXT: br label [[TMP16]] ; IR-DPP: 16: ; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -356,23 +356,23 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] ; IR-ITERATIVE-NEXT: br label [[TMP17]] @@ -383,10 +383,10 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) ; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 @@ -398,31 +398,31 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str ; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) ; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] ; IR-DPP: 26: @@ -430,8 +430,8 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str ; IR-DPP-NEXT: br label [[TMP28]] ; IR-DPP: 28: ; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] -; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) ; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] ; IR-DPP-NEXT: br label [[TMP34]] @@ -566,15 +566,15 @@ define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_uns define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] ; IR-ITERATIVE: 10: @@ -582,7 +582,7 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_uns ; IR-ITERATIVE-NEXT: br label [[TMP12]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] @@ -593,15 +593,15 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_uns ; IR-ITERATIVE-NEXT: ret float [[TMP20]] ; ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) ; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] ; IR-DPP: 10: @@ -609,7 +609,7 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_uns ; IR-DPP-NEXT: br label [[TMP12]] ; IR-DPP: 12: ; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]] ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] @@ -625,23 +625,23 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_uns define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] ; IR-ITERATIVE-NEXT: br label [[TMP17]] @@ -652,10 +652,10 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_uns ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) ; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 @@ -667,31 +667,31 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_uns ; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP9]], float [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP13]], float [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP15]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP17]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP19]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) ; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] ; IR-DPP: 26: @@ -699,8 +699,8 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_uns ; IR-DPP-NEXT: br label [[TMP28]] ; IR-DPP: 28: ; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] -; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) ; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP30]], float [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] ; IR-DPP-NEXT: br label [[TMP34]] @@ -714,16 +714,16 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_uns define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -734,7 +734,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_st ; IR-ITERATIVE-NEXT: br label [[TMP16]] ; IR-ITERATIVE: 16: ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -745,16 +745,16 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_st ; IR-ITERATIVE-NEXT: ret float [[TMP24]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -765,7 +765,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_st ; IR-DPP-NEXT: br label [[TMP16]] ; IR-DPP: 16: ; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -781,23 +781,23 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_st define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]] ; IR-ITERATIVE-NEXT: br label [[TMP17]] @@ -808,10 +808,10 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_st ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) ; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 @@ -823,31 +823,31 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_st ; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) ; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] ; IR-DPP: 26: @@ -855,8 +855,8 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_st ; IR-DPP-NEXT: br label [[TMP28]] ; IR-DPP: 28: ; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] -; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) ; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]] ; IR-DPP-NEXT: br label [[TMP34]] @@ -1103,16 +1103,16 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_scope_a define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -1123,7 +1123,7 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_ ; IR-ITERATIVE-NEXT: br label [[TMP16]] ; IR-ITERATIVE: 16: ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -1134,16 +1134,16 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_ ; IR-ITERATIVE-NEXT: ret double [[TMP24]] ; ; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -1154,7 +1154,7 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_ ; IR-DPP-NEXT: br label [[TMP16]] ; IR-DPP: 16: ; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -1170,23 +1170,23 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("one-as") monotonic, align 8 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] ; IR-ITERATIVE-NEXT: br label [[TMP17]] @@ -1197,10 +1197,10 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_ ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) ; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 @@ -1212,31 +1212,31 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_ ; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) ; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] ; IR-DPP: 26: @@ -1244,8 +1244,8 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_ ; IR-DPP-NEXT: br label [[TMP28]] ; IR-DPP: 28: ; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] -; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) ; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] ; IR-DPP-NEXT: br label [[TMP34]] @@ -1259,16 +1259,16 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_ define amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -1279,7 +1279,7 @@ define amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_s ; IR-ITERATIVE-NEXT: br label [[TMP16]] ; IR-ITERATIVE: 16: ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -1290,16 +1290,16 @@ define amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_s ; IR-ITERATIVE-NEXT: ret double [[TMP24]] ; ; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -1310,7 +1310,7 @@ define amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_s ; IR-DPP-NEXT: br label [[TMP16]] ; IR-DPP: 16: ; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -1326,23 +1326,23 @@ define amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_s define amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] ; IR-ITERATIVE-NEXT: br label [[TMP17]] @@ -1353,10 +1353,10 @@ define amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_s ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) ; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 @@ -1368,31 +1368,31 @@ define amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_s ; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) ; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] ; IR-DPP: 26: @@ -1400,8 +1400,8 @@ define amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_s ; IR-DPP-NEXT: br label [[TMP28]] ; IR-DPP: 28: ; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] -; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) ; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] ; IR-DPP-NEXT: br label [[TMP34]] @@ -1536,15 +1536,15 @@ define amdgpu_ps double @global_atomic_fmin_double_uni_address_div_value_agent_s define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] ; IR-ITERATIVE: 10: @@ -1552,7 +1552,7 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_ ; IR-ITERATIVE-NEXT: br label [[TMP12]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] @@ -1563,15 +1563,15 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_ ; IR-ITERATIVE-NEXT: ret double [[TMP20]] ; ; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) ; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] ; IR-DPP: 10: @@ -1579,7 +1579,7 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_ ; IR-DPP-NEXT: br label [[TMP12]] ; IR-DPP: 12: ; IR-DPP-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]] ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] @@ -1595,23 +1595,23 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_ define amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] ; IR-ITERATIVE-NEXT: br label [[TMP17]] @@ -1622,10 +1622,10 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_ ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) ; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.maxnum.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 @@ -1637,31 +1637,31 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_ ; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) +; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP9]], double [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP11]], double [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP13]], double [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP15]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP17]], double [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP19]], double [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) ; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] ; IR-DPP: 26: @@ -1669,8 +1669,8 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_ ; IR-DPP-NEXT: br label [[TMP28]] ; IR-DPP: 28: ; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] -; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) ; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP30]], double [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] ; IR-DPP-NEXT: br label [[TMP34]] @@ -1684,16 +1684,16 @@ define amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -1704,7 +1704,7 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_ ; IR-ITERATIVE-NEXT: br label [[TMP16]] ; IR-ITERATIVE: 16: ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -1715,16 +1715,16 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_ ; IR-ITERATIVE-NEXT: ret double [[TMP24]] ; ; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -1735,7 +1735,7 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_ ; IR-DPP-NEXT: br label [[TMP16]] ; IR-DPP: 16: ; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -1751,23 +1751,23 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] monotonic, align 4 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]] ; IR-ITERATIVE: 12: ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]] ; IR-ITERATIVE-NEXT: br label [[TMP17]] @@ -1778,10 +1778,10 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_system_ ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) +; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) ; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]] ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1 @@ -1793,31 +1793,31 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_system_ ; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) +; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) ; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]] ; IR-DPP: 26: @@ -1825,8 +1825,8 @@ define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_system_ ; IR-DPP-NEXT: br label [[TMP28]] ; IR-DPP: 28: ; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ] -; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) +; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) ; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]] ; IR-DPP-NEXT: br label [[TMP34]] diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll index af38d6e27f6ff..1f53b1c1e2ef3 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll @@ -112,18 +112,18 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_scope_agent_scop define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7:[0-9]+]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] @@ -136,18 +136,18 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_one_as_scope_uns ; IR-ITERATIVE-NEXT: ret void ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 -; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8:[0-9]+]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]] @@ -165,16 +165,16 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_one_as_scope_uns define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] syncscope("one-as") monotonic, align 4 @@ -186,9 +186,9 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_uns ; IR-ITERATIVE: ComputeLoop: ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) ; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 @@ -200,30 +200,30 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_uns ; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) ; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] ; IR-DPP: 25: @@ -240,16 +240,16 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_uns define amdgpu_ps void @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -264,16 +264,16 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_uni_value_agent_scope_stri ; IR-ITERATIVE-NEXT: ret void ; ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -293,16 +293,16 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_uni_value_agent_scope_stri define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] syncscope("agent") monotonic, align 4 @@ -314,9 +314,9 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_stri ; IR-ITERATIVE: ComputeLoop: ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) ; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 @@ -328,30 +328,30 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_stri ; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) ; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] ; IR-DPP: 25: @@ -467,45 +467,25 @@ define amdgpu_ps void @global_atomic_fmin_uni_address_div_value_agent_scope_unsa } define amdgpu_ps void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{ -; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] -; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 -; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 -; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] -; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 -; IR-ITERATIVE-NEXT: br label [[TMP12]] -; IR-ITERATIVE: 12: -; IR-ITERATIVE-NEXT: br label [[TMP13]] -; IR-ITERATIVE: 13: -; IR-ITERATIVE-NEXT: ret void -; -; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] -; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 -; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 -; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] -; IR-DPP: 10: -; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 -; IR-DPP-NEXT: br label [[TMP12]] -; IR-DPP: 12: -; IR-DPP-NEXT: br label [[TMP13]] -; IR-DPP: 13: -; IR-DPP-NEXT: ret void +; IR-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp( +; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() +; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] +; IR: 2: +; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 +; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 +; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 +; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] +; IR: 10: +; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4 +; IR-NEXT: br label [[TMP12]] +; IR: 12: +; IR-NEXT: br label [[TMP13]] +; IR: 13: +; IR-NEXT: ret void ; %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic ret void @@ -513,16 +493,16 @@ define amdgpu_ps void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsa define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] syncscope("agent") monotonic, align 4 @@ -534,9 +514,9 @@ define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsa ; IR-ITERATIVE: ComputeLoop: ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) ; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 @@ -548,30 +528,30 @@ define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsa ; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP9]], float [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP13]], float [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP15]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP17]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP19]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) ; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] ; IR-DPP: 25: @@ -588,16 +568,16 @@ define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsa define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -612,16 +592,16 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_system_scope_str ; IR-ITERATIVE-NEXT: ret void ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -641,16 +621,16 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_system_scope_str define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP17:%.*]] monotonic, align 4 @@ -662,9 +642,9 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_str ; IR-ITERATIVE: ComputeLoop: ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP15]]) ; IR-ITERATIVE-NEXT: [[TMP17]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 @@ -676,30 +656,30 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_str ; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) ; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] ; IR-DPP: 25: @@ -928,16 +908,16 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_div_value_scope_age define amdgpu_ps void @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -952,16 +932,16 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_uni_value_one_as_sc ; IR-ITERATIVE-NEXT: ret void ; ; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -981,16 +961,16 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_uni_value_one_as_sc define amdgpu_ps void @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP17:%.*]] syncscope("one-as") monotonic, align 8 @@ -1002,9 +982,9 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_div_value_one_as_sc ; IR-ITERATIVE: ComputeLoop: ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP15]]) ; IR-ITERATIVE-NEXT: [[TMP17]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 @@ -1016,30 +996,30 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_div_value_one_as_sc ; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) ; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] ; IR-DPP: 25: @@ -1056,16 +1036,16 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_div_value_one_as_sc define amdgpu_ps void @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -1080,16 +1060,16 @@ define amdgpu_ps void @global_atomic_fsub_double_uni_address_uni_value_agent_sco ; IR-ITERATIVE-NEXT: ret void ; ; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -1109,16 +1089,16 @@ define amdgpu_ps void @global_atomic_fsub_double_uni_address_uni_value_agent_sco define amdgpu_ps void @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP17:%.*]] syncscope("agent") monotonic, align 8 @@ -1130,9 +1110,9 @@ define amdgpu_ps void @global_atomic_fsub_double_uni_address_div_value_agent_sco ; IR-ITERATIVE: ComputeLoop: ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP15]]) ; IR-ITERATIVE-NEXT: [[TMP17]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 @@ -1144,30 +1124,30 @@ define amdgpu_ps void @global_atomic_fsub_double_uni_address_div_value_agent_sco ; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) ; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] ; IR-DPP: 25: @@ -1283,45 +1263,25 @@ define amdgpu_ps void @global_atomic_fmin_double_uni_address_div_value_agent_sco } define amdgpu_ps void @global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1{ -; IR-ITERATIVE-LABEL: @global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] -; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] -; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 -; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 -; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] -; IR-ITERATIVE: 10: -; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 -; IR-ITERATIVE-NEXT: br label [[TMP12]] -; IR-ITERATIVE: 12: -; IR-ITERATIVE-NEXT: br label [[TMP13]] -; IR-ITERATIVE: 13: -; IR-ITERATIVE-NEXT: ret void -; -; IR-DPP-LABEL: @global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] -; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] -; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 -; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 -; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 -; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] -; IR-DPP: 10: -; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 -; IR-DPP-NEXT: br label [[TMP12]] -; IR-DPP: 12: -; IR-DPP-NEXT: br label [[TMP13]] -; IR-DPP: 13: -; IR-DPP-NEXT: ret void +; IR-LABEL: @global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp( +; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() +; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] +; IR: 2: +; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) +; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 +; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 +; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 +; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]] +; IR: 10: +; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8 +; IR-NEXT: br label [[TMP12]] +; IR: 12: +; IR-NEXT: br label [[TMP13]] +; IR: 13: +; IR-NEXT: ret void ; %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic ret void @@ -1329,16 +1289,16 @@ define amdgpu_ps void @global_atomic_fmax_double_uni_address_uni_value_agent_sco define amdgpu_ps void @global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1{ ; IR-ITERATIVE-LABEL: @global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP17:%.*]] syncscope("agent") monotonic, align 8 @@ -1350,9 +1310,9 @@ define amdgpu_ps void @global_atomic_fmax_double_uni_address_div_value_agent_sco ; IR-ITERATIVE: ComputeLoop: ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP15]]) ; IR-ITERATIVE-NEXT: [[TMP17]] = call double @llvm.experimental.constrained.maxnum.f64(double [[ACCUMULATOR]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 @@ -1364,30 +1324,30 @@ define amdgpu_ps void @global_atomic_fmax_double_uni_address_div_value_agent_sco ; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) +; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP9]], double [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP11]], double [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP13]], double [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP15]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP17]], double [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP19]], double [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) ; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] ; IR-DPP: 25: @@ -1404,16 +1364,16 @@ define amdgpu_ps void @global_atomic_fmax_double_uni_address_div_value_agent_sco define amdgpu_ps void @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] @@ -1428,16 +1388,16 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_uni_value_system_sc ; IR-ITERATIVE-NEXT: ret void ; ; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] ; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] @@ -1457,16 +1417,16 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_uni_value_system_sc define amdgpu_ps void @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 { ; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp( -; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]] ; IR-ITERATIVE: 2: -; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]] -; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] ; IR-ITERATIVE: 10: ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP17:%.*]] monotonic, align 4 @@ -1478,9 +1438,9 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_div_value_system_sc ; IR-ITERATIVE: ComputeLoop: ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP17]], [[COMPUTELOOP]] ] ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP20:%.*]], [[COMPUTELOOP]] ] -; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 -; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP15]]) #[[ATTR7]] +; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP15]]) ; IR-ITERATIVE-NEXT: [[TMP17]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]] ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = shl i64 1, [[TMP14]] ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], -1 @@ -1492,30 +1452,30 @@ define amdgpu_ps void @global_atomic_fadd_double_uni_address_div_value_system_sc ; IR-ITERATIVE-NEXT: br i1 [[TMP22]], label [[TMP10:%.*]], label [[TMP12]] ; ; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp( -; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]] +; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP28:%.*]] ; IR-DPP: 2: -; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) +; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) +; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) +; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) ; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] -; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]] -; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]] +; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) +; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) ; IR-DPP-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0 ; IR-DPP-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP27:%.*]] ; IR-DPP: 25: diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll index 84d9a64efa0f7..4b24ce6f88d8a 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll @@ -1848,7 +1848,7 @@ define bfloat @test_atomicrmw_fadd_bf16_global_system_align4(ptr addrspace(1) %p define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bfloat %value) #2 { ; ALL-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp( -; ALL-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR8:[0-9]+]] +; ALL-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) ; ALL-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32 ; ALL-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 ; ALL-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 @@ -1861,7 +1861,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf ; ALL-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]] ; ALL-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; ALL-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat -; ALL-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]] +; ALL-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8:[0-9]+]] ; ALL-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16 ; ALL-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32 ; ALL-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]] diff --git a/llvm/test/Transforms/HardwareLoops/scalar-while-strictfp.ll b/llvm/test/Transforms/HardwareLoops/scalar-while-strictfp.ll index 951aacc065362..c52197fd677a9 100644 --- a/llvm/test/Transforms/HardwareLoops/scalar-while-strictfp.ll +++ b/llvm/test/Transforms/HardwareLoops/scalar-while-strictfp.ll @@ -10,14 +10,14 @@ define void @while_lt(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-DEC-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-DEC: while.body.preheader: ; CHECK-DEC-NEXT: [[TMP0:%.*]] = sub i32 [[N]], [[I]] -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP0]]) #[[ATTR0:[0-9]+]] +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP0]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-DEC: while.body: ; CHECK-DEC-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-DEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-DEC-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-DEC-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 -; CHECK-DEC-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) #[[ATTR0]] +; CHECK-DEC-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-DEC-NEXT: br i1 [[TMP1]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-DEC: while.end: ; CHECK-DEC-NEXT: ret void @@ -29,7 +29,7 @@ define void @while_lt(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-PHI: while.body.preheader: ; CHECK-PHI-NEXT: [[TMP0:%.*]] = sub i32 [[N]], [[I]] -; CHECK-PHI-NEXT: [[TMP1:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP0]]) #[[ATTR0:[0-9]+]] +; CHECK-PHI-NEXT: [[TMP1:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP0]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHI: while.body: ; CHECK-PHI-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] @@ -37,7 +37,7 @@ define void @while_lt(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHI-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-PHI-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 -; CHECK-PHI-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP2]], i32 1) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP2]], i32 1) ; CHECK-PHI-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-PHI-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHI: while.end: @@ -67,14 +67,14 @@ define void @while_gt(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-DEC-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-DEC: while.body.preheader: ; CHECK-DEC-NEXT: [[TMP0:%.*]] = sub i32 [[I]], [[N]] -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP0]]) #[[ATTR0]] +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP0]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-DEC: while.body: ; CHECK-DEC-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-DEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-DEC-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-DEC-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-DEC-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) #[[ATTR0]] +; CHECK-DEC-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-DEC-NEXT: br i1 [[TMP1]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-DEC: while.end: ; CHECK-DEC-NEXT: ret void @@ -86,7 +86,7 @@ define void @while_gt(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-PHI: while.body.preheader: ; CHECK-PHI-NEXT: [[TMP0:%.*]] = sub i32 [[I]], [[N]] -; CHECK-PHI-NEXT: [[TMP1:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP0]]) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP1:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP0]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHI: while.body: ; CHECK-PHI-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] @@ -94,7 +94,7 @@ define void @while_gt(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHI-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-PHI-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-PHI-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP2]], i32 1) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP2]], i32 1) ; CHECK-PHI-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-PHI-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHI: while.end: @@ -125,14 +125,14 @@ define void @while_gte(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-DEC: while.body.preheader: ; CHECK-DEC-NEXT: [[TMP0:%.*]] = add i32 [[I]], 1 ; CHECK-DEC-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[N]] -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP1]]) #[[ATTR0]] +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP1]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-DEC: while.body: ; CHECK-DEC-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-DEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-DEC-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-DEC-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-DEC-NEXT: [[TMP2:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) #[[ATTR0]] +; CHECK-DEC-NEXT: [[TMP2:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-DEC-NEXT: br i1 [[TMP2]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-DEC: while.end: ; CHECK-DEC-NEXT: ret void @@ -145,7 +145,7 @@ define void @while_gte(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI: while.body.preheader: ; CHECK-PHI-NEXT: [[TMP0:%.*]] = add i32 [[I]], 1 ; CHECK-PHI-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[N]] -; CHECK-PHI-NEXT: [[TMP2:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP1]]) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP2:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP1]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHI: while.body: ; CHECK-PHI-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] @@ -153,7 +153,7 @@ define void @while_gte(i32 %i, i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHI-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-PHI-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-PHI-NEXT: [[TMP4]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP4]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1) ; CHECK-PHI-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK-PHI-NEXT: br i1 [[TMP5]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHI: while.end: @@ -182,14 +182,14 @@ define void @while_ne(i32 %N, ptr nocapture %A) strictfp { ; CHECK-DEC-NEXT: [[CMP:%.*]] = icmp ne i32 [[N:%.*]], 0 ; CHECK-DEC-NEXT: br i1 [[CMP]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-DEC: while.body.preheader: -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) #[[ATTR0]] +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-DEC: while.body: ; CHECK-DEC-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] ; CHECK-DEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-DEC-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-DEC-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 -; CHECK-DEC-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) #[[ATTR0]] +; CHECK-DEC-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-DEC-NEXT: br i1 [[TMP0]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-DEC: while.end: ; CHECK-DEC-NEXT: ret void @@ -200,7 +200,7 @@ define void @while_ne(i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[CMP:%.*]] = icmp ne i32 [[N:%.*]], 0 ; CHECK-PHI-NEXT: br i1 [[CMP]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-PHI: while.body.preheader: -; CHECK-PHI-NEXT: [[TMP0:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[N]]) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP0:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[N]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHI: while.body: ; CHECK-PHI-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] @@ -208,7 +208,7 @@ define void @while_ne(i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHI-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-PHI-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 -; CHECK-PHI-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP1]], i32 1) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP1]], i32 1) ; CHECK-PHI-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK-PHI-NEXT: br i1 [[TMP3]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHI: while.end: @@ -237,14 +237,14 @@ define void @while_eq(i32 %N, ptr nocapture %A) strictfp { ; CHECK-DEC-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 0 ; CHECK-DEC-NEXT: br i1 [[CMP]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-DEC: while.body.preheader: -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) #[[ATTR0]] +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-DEC: while.body: ; CHECK-DEC-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] ; CHECK-DEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-DEC-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-DEC-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 -; CHECK-DEC-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) #[[ATTR0]] +; CHECK-DEC-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-DEC-NEXT: br i1 [[TMP0]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-DEC: while.end: ; CHECK-DEC-NEXT: ret void @@ -255,7 +255,7 @@ define void @while_eq(i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 0 ; CHECK-PHI-NEXT: br i1 [[CMP]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-PHI: while.body.preheader: -; CHECK-PHI-NEXT: [[TMP0:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[N]]) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP0:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[N]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHI: while.body: ; CHECK-PHI-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] @@ -263,7 +263,7 @@ define void @while_eq(i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHI-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-PHI-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 -; CHECK-PHI-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP1]], i32 1) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP1]], i32 1) ; CHECK-PHI-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK-PHI-NEXT: br i1 [[TMP3]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHI: while.end: @@ -294,14 +294,14 @@ define void @while_preheader_eq(i32 %N, ptr nocapture %A) strictfp { ; CHECK-DEC-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 0 ; CHECK-DEC-NEXT: br i1 [[CMP]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-DEC: while.body.preheader: -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) #[[ATTR0]] +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-DEC: while.body: ; CHECK-DEC-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] ; CHECK-DEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-DEC-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-DEC-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 -; CHECK-DEC-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) #[[ATTR0]] +; CHECK-DEC-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-DEC-NEXT: br i1 [[TMP0]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-DEC: while.end: ; CHECK-DEC-NEXT: ret void @@ -314,7 +314,7 @@ define void @while_preheader_eq(i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 0 ; CHECK-PHI-NEXT: br i1 [[CMP]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-PHI: while.body.preheader: -; CHECK-PHI-NEXT: [[TMP0:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[N]]) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP0:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[N]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHI: while.body: ; CHECK-PHI-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] @@ -322,7 +322,7 @@ define void @while_preheader_eq(i32 %N, ptr nocapture %A) strictfp { ; CHECK-PHI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHI-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 ; CHECK-PHI-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 -; CHECK-PHI-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP1]], i32 1) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP1]], i32 1) ; CHECK-PHI-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK-PHI-NEXT: br i1 [[TMP3]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHI: while.end: @@ -356,7 +356,7 @@ define void @nested(ptr nocapture %A, i32 %N) strictfp { ; CHECK-DEC: while.cond1.preheader.us: ; CHECK-DEC-NEXT: [[I_021_US:%.*]] = phi i32 [ [[INC6_US:%.*]], [[WHILE_COND1_WHILE_END_CRIT_EDGE_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-DEC-NEXT: [[MUL_US:%.*]] = mul i32 [[I_021_US]], [[N]] -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) #[[ATTR0]] +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY3_US:%.*]] ; CHECK-DEC: while.body3.us: ; CHECK-DEC-NEXT: [[J_019_US:%.*]] = phi i32 [ 0, [[WHILE_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[WHILE_BODY3_US]] ] @@ -364,7 +364,7 @@ define void @nested(ptr nocapture %A, i32 %N) strictfp { ; CHECK-DEC-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[ADD_US]] ; CHECK-DEC-NEXT: store i32 [[ADD_US]], ptr [[ARRAYIDX_US]], align 4 ; CHECK-DEC-NEXT: [[INC_US]] = add nuw i32 [[J_019_US]], 1 -; CHECK-DEC-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) #[[ATTR0]] +; CHECK-DEC-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-DEC-NEXT: br i1 [[TMP0]], label [[WHILE_BODY3_US]], label [[WHILE_COND1_WHILE_END_CRIT_EDGE_US]] ; CHECK-DEC: while.cond1.while.end_crit_edge.us: ; CHECK-DEC-NEXT: [[INC6_US]] = add nuw i32 [[I_021_US]], 1 @@ -381,7 +381,7 @@ define void @nested(ptr nocapture %A, i32 %N) strictfp { ; CHECK-PHI: while.cond1.preheader.us: ; CHECK-PHI-NEXT: [[I_021_US:%.*]] = phi i32 [ [[INC6_US:%.*]], [[WHILE_COND1_WHILE_END_CRIT_EDGE_US:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-PHI-NEXT: [[MUL_US:%.*]] = mul i32 [[I_021_US]], [[N]] -; CHECK-PHI-NEXT: [[TMP0:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[N]]) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP0:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[N]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY3_US:%.*]] ; CHECK-PHI: while.body3.us: ; CHECK-PHI-NEXT: [[J_019_US:%.*]] = phi i32 [ 0, [[WHILE_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[WHILE_BODY3_US]] ] @@ -390,7 +390,7 @@ define void @nested(ptr nocapture %A, i32 %N) strictfp { ; CHECK-PHI-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[ADD_US]] ; CHECK-PHI-NEXT: store i32 [[ADD_US]], ptr [[ARRAYIDX_US]], align 4 ; CHECK-PHI-NEXT: [[INC_US]] = add nuw i32 [[J_019_US]], 1 -; CHECK-PHI-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP1]], i32 1) #[[ATTR0]] +; CHECK-PHI-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP1]], i32 1) ; CHECK-PHI-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK-PHI-NEXT: br i1 [[TMP3]], label [[WHILE_BODY3_US]], label [[WHILE_COND1_WHILE_END_CRIT_EDGE_US]] ; CHECK-PHI: while.cond1.while.end_crit_edge.us: