-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[compiler-rt][AArch64] Provide basic implementations of SME memcpy/memmove in case of strictly aligned memory access #138250
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
#include <stddef.h> | ||
|
||
/* The asm version uses FP registers. Use this on targets without them */ | ||
#if __ARM_FP == 0 | ||
// The asm version uses FP registers and unaligned memory accesses. Use this on | ||
// targets without them. | ||
#if __ARM_FP == 0 || !defined(__ARM_FEATURE_UNALIGNED) | ||
void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible { | ||
unsigned char *destp = (unsigned char *)dest; | ||
unsigned char c8 = (unsigned char)c; | ||
|
@@ -22,3 +23,46 @@ const void *__arm_sc_memchr(const void *src, int c, | |
|
||
return NULL; | ||
} | ||
|
||
#ifndef __ARM_FEATURE_UNALIGNED | ||
|
||
static void *memcpy_fwd(void *dest, const void *src, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it still worth adding the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These internal functions are now marked as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK yeah I see. If it's ok with you, I do think it's better to have the same naming scheme for all the functions and start with |
||
size_t n) __arm_streaming_compatible { | ||
unsigned char *destp = (unsigned char *)dest; | ||
const unsigned char *srcp = (const unsigned char *)src; | ||
|
||
for (size_t i = 0; i < n; ++i) | ||
destp[i] = srcp[i]; | ||
return dest; | ||
} | ||
|
||
static void *memcpy_rev(void *dest, const void *src, | ||
size_t n) __arm_streaming_compatible { | ||
unsigned char *destp = (unsigned char *)dest; | ||
const unsigned char *srcp = (const unsigned char *)src; | ||
|
||
while (n > 0) { | ||
--n; | ||
destp[n] = srcp[n]; | ||
} | ||
return dest; | ||
} | ||
|
||
void *__arm_sc_memcpy(void *__restrict dest, const void *__restrict src, | ||
size_t n) __arm_streaming_compatible { | ||
return memcpy_fwd(dest, src, n); | ||
} | ||
|
||
void *__arm_sc_memmove(void *dest, const void *src, | ||
size_t n) __arm_streaming_compatible { | ||
unsigned char *destp = (unsigned char *)dest; | ||
const unsigned char *srcp = (const unsigned char *)src; | ||
|
||
if ((srcp > (destp + n)) || (destp > (srcp + n))) | ||
return __arm_sc_memcpy(dest, src, n); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume you're calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. From some experimentation, the compiler does take advantage of it: https://godbolt.org/z/oj4b97KK9 |
||
if (srcp > destp) | ||
return memcpy_fwd(dest, src, n); | ||
return memcpy_rev(dest, src, n); | ||
} | ||
|
||
#endif /* !defined(__ARM_FEATURE_UNALIGNED) */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To avoid creating many different variants and various ifdefs, does it make sense to choose the implementation (either
sme-libc-mem-routines.S
(optimised) orsme-libc-routines.c
(basic)) in CMakeLists.txt ? That way we can also let cmake emit a warning that the unoptimised SME routines are chosen, because of the constraints given by the compilation flags to compiler-rt.