-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[CodeGen] Add TBAA struct path info for array members #137719
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Avoid unintentional matches against extra load/stores in the unoptimized LLVM IR.
This enables the LLVM optimizer to view accesses to distinct struct members as independent, also for array members. For example, the following two stores no longer alias: struct S { int a[10]; int b; }; void test(S *p, int i) { p->a[i] = ...; p->b = ...; } Array members were already added to TBAA struct type nodes in commit 57493e2. Here, we extend a path tag for an array subscript expression.
@llvm/pr-subscribers-clang-codegen Author: Bruno De Fraine (brunodf-snps) ChangesThis enables the LLVM optimizer to view accesses to distinct struct
Array members were already added to TBAA struct type nodes in commit Full diff: https://github.com/llvm/llvm-project/pull/137719.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index bba7d1e805f3f..c95a54fcebba9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4503,7 +4503,32 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
E->getType(), !getLangOpts().PointerOverflowDefined, SignedIndices,
E->getExprLoc(), &arrayType, E->getBase());
EltBaseInfo = ArrayLV.getBaseInfo();
- EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
+ if (!CGM.getCodeGenOpts().NewStructPathTBAA) {
+ // Since CodeGenTBAA::getTypeInfoHelper only handles array types for
+ // new struct path TBAA, we must a use a plain access.
+ EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
+ } else if (ArrayLV.getTBAAInfo().isMayAlias()) {
+ EltTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
+ } else if (ArrayLV.getTBAAInfo().isIncomplete()) {
+ EltTBAAInfo = CGM.getTBAAAccessInfo(E->getType());
+ } else {
+ // Extend struct path from base lvalue, similar to EmitLValueForField.
+ // If no base type has been assigned for the array access, then try to
+ // generate one.
+ EltTBAAInfo = ArrayLV.getTBAAInfo();
+ if (!EltTBAAInfo.BaseType) {
+ EltTBAAInfo.BaseType = CGM.getTBAABaseTypeInfo(ArrayLV.getType());
+ assert(!EltTBAAInfo.Offset &&
+ "Nonzero offset for an access with no base type!");
+ }
+ // The index into the array is a runtime value. We use the same struct
+ // path for all array elements (that of the element at index 0). So we
+ // set the access type and size, but do not have to adjust
+ // EltTBAAInfo.Offset.
+ EltTBAAInfo.AccessType = CGM.getTBAATypeInfo(E->getType());
+ EltTBAAInfo.Size =
+ getContext().getTypeSizeInChars(E->getType()).getQuantity();
+ }
} else {
// The base must be a pointer; emit it with an estimate of its alignment.
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
diff --git a/clang/test/CodeGen/tbaa-array.cpp b/clang/test/CodeGen/tbaa-array.cpp
index 4a6576e2eeb7f..7cda1dd8d5bf7 100644
--- a/clang/test/CodeGen/tbaa-array.cpp
+++ b/clang/test/CodeGen/tbaa-array.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
+// RUN: %clang_cc1 -triple x86_64-linux -O1 %s \
// RUN: -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
+// RUN: %clang_cc1 -triple x86_64-linux -O1 %s \
// RUN: -new-struct-path-tbaa -emit-llvm -o - | \
// RUN: FileCheck -check-prefix=CHECK-NEW %s
//
@@ -10,6 +10,8 @@
struct A { int i; };
struct B { A a[1]; };
struct C { int i; int x[3]; };
+struct D { int n; int arr[]; }; // flexible array member
+extern int AA[]; // incomplete array type
int foo(B *b) {
// CHECK-LABEL: _Z3fooP1B
@@ -28,16 +30,30 @@ int bar(C *c) {
int bar2(C *c) {
// CHECK-NEW-LABEL: _Z4bar2P1C
-// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_x:!.*]]
return c->x[2];
}
int bar3(C *c, int j) {
// CHECK-NEW-LABEL: _Z4bar3P1Ci
-// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_x]]
return c->x[j];
}
+int bar4(D *d) {
+// CHECK-NEW-LABEL: _Z4bar4P1D
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+ return d->arr[d->n];
+}
+
+int bar5(int j) {
+// CHECK-NEW-LABEL: _Z4bar5i
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+ return AA[2] + AA[j];
+}
+
// CHECK-DAG: [[TAG_A_i]] = !{[[TYPE_A:!.*]], [[TYPE_int:!.*]], i64 0}
// CHECK-DAG: [[TYPE_A]] = !{!"_ZTS1A", !{{.*}}, i64 0}
// CHECK-DAG: [[TYPE_int]] = !{!"int", !{{.*}}, i64 0}
@@ -45,8 +61,8 @@ int bar3(C *c, int j) {
// CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"}
// CHECK-NEW-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"}
// CHECK-NEW-DAG: [[TAG_int]] = !{[[TYPE_int]], [[TYPE_int]], i64 0, i64 4}
-// CHECK-NEW-DAG: [[TYPE_pointer:!.*]] = !{[[TYPE_char]], i64 8, !"any pointer"}
// CHECK-NEW-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 4, !"_ZTS1A", [[TYPE_int]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TAG_A_i]] = !{[[TYPE_A]], [[TYPE_int]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS1C", [[TYPE_int]], i64 0, i64 4, [[TYPE_int]], i64 4, i64 12}
// CHECK-NEW-DAG: [[TAG_C_i]] = !{[[TYPE_C:!.*]], [[TYPE_int:!.*]], i64 0, i64 4}
+// CHECK-NEW-DAG: [[TAG_C_x]] = !{[[TYPE_C:!.*]], [[TYPE_int:!.*]], i64 4, i64 4}
|
@llvm/pr-subscribers-clang Author: Bruno De Fraine (brunodf-snps) ChangesThis enables the LLVM optimizer to view accesses to distinct struct
Array members were already added to TBAA struct type nodes in commit Full diff: https://github.com/llvm/llvm-project/pull/137719.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index bba7d1e805f3f..c95a54fcebba9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4503,7 +4503,32 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
E->getType(), !getLangOpts().PointerOverflowDefined, SignedIndices,
E->getExprLoc(), &arrayType, E->getBase());
EltBaseInfo = ArrayLV.getBaseInfo();
- EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
+ if (!CGM.getCodeGenOpts().NewStructPathTBAA) {
+ // Since CodeGenTBAA::getTypeInfoHelper only handles array types for
+ // new struct path TBAA, we must a use a plain access.
+ EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
+ } else if (ArrayLV.getTBAAInfo().isMayAlias()) {
+ EltTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
+ } else if (ArrayLV.getTBAAInfo().isIncomplete()) {
+ EltTBAAInfo = CGM.getTBAAAccessInfo(E->getType());
+ } else {
+ // Extend struct path from base lvalue, similar to EmitLValueForField.
+ // If no base type has been assigned for the array access, then try to
+ // generate one.
+ EltTBAAInfo = ArrayLV.getTBAAInfo();
+ if (!EltTBAAInfo.BaseType) {
+ EltTBAAInfo.BaseType = CGM.getTBAABaseTypeInfo(ArrayLV.getType());
+ assert(!EltTBAAInfo.Offset &&
+ "Nonzero offset for an access with no base type!");
+ }
+ // The index into the array is a runtime value. We use the same struct
+ // path for all array elements (that of the element at index 0). So we
+ // set the access type and size, but do not have to adjust
+ // EltTBAAInfo.Offset.
+ EltTBAAInfo.AccessType = CGM.getTBAATypeInfo(E->getType());
+ EltTBAAInfo.Size =
+ getContext().getTypeSizeInChars(E->getType()).getQuantity();
+ }
} else {
// The base must be a pointer; emit it with an estimate of its alignment.
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
diff --git a/clang/test/CodeGen/tbaa-array.cpp b/clang/test/CodeGen/tbaa-array.cpp
index 4a6576e2eeb7f..7cda1dd8d5bf7 100644
--- a/clang/test/CodeGen/tbaa-array.cpp
+++ b/clang/test/CodeGen/tbaa-array.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
+// RUN: %clang_cc1 -triple x86_64-linux -O1 %s \
// RUN: -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
+// RUN: %clang_cc1 -triple x86_64-linux -O1 %s \
// RUN: -new-struct-path-tbaa -emit-llvm -o - | \
// RUN: FileCheck -check-prefix=CHECK-NEW %s
//
@@ -10,6 +10,8 @@
struct A { int i; };
struct B { A a[1]; };
struct C { int i; int x[3]; };
+struct D { int n; int arr[]; }; // flexible array member
+extern int AA[]; // incomplete array type
int foo(B *b) {
// CHECK-LABEL: _Z3fooP1B
@@ -28,16 +30,30 @@ int bar(C *c) {
int bar2(C *c) {
// CHECK-NEW-LABEL: _Z4bar2P1C
-// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_x:!.*]]
return c->x[2];
}
int bar3(C *c, int j) {
// CHECK-NEW-LABEL: _Z4bar3P1Ci
-// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_x]]
return c->x[j];
}
+int bar4(D *d) {
+// CHECK-NEW-LABEL: _Z4bar4P1D
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+ return d->arr[d->n];
+}
+
+int bar5(int j) {
+// CHECK-NEW-LABEL: _Z4bar5i
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+ return AA[2] + AA[j];
+}
+
// CHECK-DAG: [[TAG_A_i]] = !{[[TYPE_A:!.*]], [[TYPE_int:!.*]], i64 0}
// CHECK-DAG: [[TYPE_A]] = !{!"_ZTS1A", !{{.*}}, i64 0}
// CHECK-DAG: [[TYPE_int]] = !{!"int", !{{.*}}, i64 0}
@@ -45,8 +61,8 @@ int bar3(C *c, int j) {
// CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"}
// CHECK-NEW-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"}
// CHECK-NEW-DAG: [[TAG_int]] = !{[[TYPE_int]], [[TYPE_int]], i64 0, i64 4}
-// CHECK-NEW-DAG: [[TYPE_pointer:!.*]] = !{[[TYPE_char]], i64 8, !"any pointer"}
// CHECK-NEW-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 4, !"_ZTS1A", [[TYPE_int]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TAG_A_i]] = !{[[TYPE_A]], [[TYPE_int]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS1C", [[TYPE_int]], i64 0, i64 4, [[TYPE_int]], i64 4, i64 12}
// CHECK-NEW-DAG: [[TAG_C_i]] = !{[[TYPE_C:!.*]], [[TYPE_int:!.*]], i64 0, i64 4}
+// CHECK-NEW-DAG: [[TAG_C_x]] = !{[[TYPE_C:!.*]], [[TYPE_int:!.*]], i64 4, i64 4}
|
@kosarev @fhahn @rjmccall @efriedma-quic @hfinkel Note 1: I found the test tbaa-array.cpp was not always matching what it seems because the patterns would match local variable load/stores in the unoptimized LLVM IR. So I started with commit d63c8fe to switch to optimized LLVM IR (this could be precommitted). |
This enables the LLVM optimizer to view accesses to distinct struct
members as independent, also for array members. For example, the
following two stores no longer alias:
Array members were already added to TBAA struct type nodes in commit
57493e2. Here, we extend a path tag for an array subscript expression.