[Cryptech-Commits] [sw/stm32] 01/03: Add loop unrolling to bring the profilable mem* functions closer to newlib, because memset is called a LOT in the course of RSA signing, and we need to understand how much time we're actually spending there.
git at cryptech.is
git at cryptech.is
Mon Dec 3 22:35:38 UTC 2018
This is an automated email from the git hooks/post-receive script.
paul at psgd.org pushed a commit to branch master
in repository sw/stm32.
commit 97034edb35e92361daaa24512989d00f6c3fd517
Author: Paul Selkirk <paul at psgd.org>
AuthorDate: Mon Nov 26 17:26:55 2018 -0500
Add loop unrolling to bring the profilable mem* functions closer to
newlib, because memset is called a LOT in the course of RSA signing, and
we need to understand how much time we're actually spending there.
---
libraries/libprof/Makefile | 6 +++-
memfunc.c => libraries/libprof/memfunc.c | 52 ++++++++++++++++++++++++--------
projects/hsm/Makefile | 1 -
3 files changed, 44 insertions(+), 15 deletions(-)
diff --git a/libraries/libprof/Makefile b/libraries/libprof/Makefile
index 4fe5fb4..37b9a23 100644
--- a/libraries/libprof/Makefile
+++ b/libraries/libprof/Makefile
@@ -1,12 +1,16 @@
LIB = libprof.a
-OBJS = gmon.o profil.o profiler.o
+OBJS = gmon.o profil.o profiler.o memfunc.o
# Don't profile the profiling code, because that way lies madness (and recursion).
CFLAGS := $(subst -pg,,$(CFLAGS))
all: $(LIB)
+# But do profile the mem functions
+memfunc.o: memfunc.c
+ $(CC) $(CFLAGS) -pg -c -o $@ $<
+
%.o : %.c
$(CC) $(CFLAGS) -c -o $@ $<
diff --git a/memfunc.c b/libraries/libprof/memfunc.c
similarity index 52%
rename from memfunc.c
rename to libraries/libprof/memfunc.c
index fd94b28..fc908e1 100644
--- a/memfunc.c
+++ b/libraries/libprof/memfunc.c
@@ -4,9 +4,15 @@
/*
* Profilable substitutes for mem*(), lacking libc_p.a
*
- * This code was written with reference to newlib, but does not copy every
- * quirk and loop-unrolling optimization from newlib. Its only purpose is
- * to let us figure out who is calling memcpy 2 million times.
+ * This code was written with reference to newlib, and was recently
+ * brought closer into line with newlib, to make profiling more accurate.
+ *
+ * Newlib is maintained by Cygwin, which is Red Hat. There is no copyright
+ * statement in the corresponding newlib source files, nor is there a
+ * COPYING file in newlib/libc/string or newlib/libc. Consider this file
+ * to be covered under one or more of the 50 copyright notices in
+ * newlib/COPYING, most of which are BSD. In any case, this file is only
+ * used for profiling, and is not used in production builds.
*/
#define is_word_aligned(x) (((size_t)(x) & 3) == 0)
@@ -16,12 +22,19 @@ void *memcpy(void *dst, const void *src, size_t n)
uint8_t *d8 = (uint8_t *)dst;
uint8_t *s8 = (uint8_t *)src;
- if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) {
+ if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) {
uint32_t *d32 = (uint32_t *)dst;
uint32_t *s32 = (uint32_t *)src;
- while (n >= 4) {
+ while (n >= 4 * sizeof(uint32_t)) {
+ *d32++ = *s32++;
+ *d32++ = *s32++;
+ *d32++ = *s32++;
*d32++ = *s32++;
- n -= 4;
+ n -= 4 * sizeof(uint32_t);
+ }
+ while (n >= sizeof(uint32_t)) {
+ *d32++ = *s32++;
+ n -= sizeof(uint32_t);
}
d8 = (uint8_t *)d32;
s8 = (uint8_t *)s32;
@@ -38,12 +51,25 @@ void *memset(void *dst, int c, size_t n)
uint8_t *d8 = (uint8_t *)dst;
uint8_t c8 = (uint8_t)c;
- if (n >= 4 && is_word_aligned(dst)) {
- uint32_t *d32 = (uint32_t *)dst;
+ while (!is_word_aligned(d8)) {
+ if (n--)
+ *d8++ = c8;
+ else
+ return dst;
+ }
+ if (n >= sizeof(uint32_t)) {
+ uint32_t *d32 = (uint32_t *)d8;
uint32_t c32 = (c8 << 24) | (c8 << 16) | (c8 << 8) | (c8);
- while (n >= 4) {
+ while (n >= 4 * sizeof(uint32_t)) {
+ *d32++ = c32;
+ *d32++ = c32;
+ *d32++ = c32;
+ *d32++ = c32;
+ n -= 4 * sizeof(uint32_t);
+ }
+ while (n >= sizeof(uint32_t)) {
*d32++ = c32;
- n -= 4;
+ n -= sizeof(uint32_t);
}
d8 = (uint8_t *)d32;
}
@@ -59,15 +85,15 @@ int memcmp(const void *dst, const void *src, size_t n)
uint8_t *d8 = (uint8_t *)dst;
uint8_t *s8 = (uint8_t *)src;
- if (n >= 4 && is_word_aligned(src) && is_word_aligned(dst)) {
+ if (n >= sizeof(uint32_t) && is_word_aligned(src) && is_word_aligned(dst)) {
uint32_t *d32 = (uint32_t *)dst;
uint32_t *s32 = (uint32_t *)src;
- while (n >= 4) {
+ while (n >= sizeof(uint32_t)) {
if (*d32 != *s32)
break;
d32++;
s32++;
- n -= 4;
+ n -= sizeof(uint32_t);
}
d8 = (uint8_t *)d32;
s8 = (uint8_t *)s32;
diff --git a/projects/hsm/Makefile b/projects/hsm/Makefile
index 3430e14..37c552d 100644
--- a/projects/hsm/Makefile
+++ b/projects/hsm/Makefile
@@ -25,7 +25,6 @@ LDFLAGS += -mfloat-abi=hard -mfpu=fpv4-sp-d16
LDFLAGS += -Wl,--gc-sections
ifdef DO_PROFILING
-OBJS += $(TOPLEVEL)/memfunc.o
LDFLAGS += --specs=rdimon.specs -lc -lrdimon
endif
More information about the Commits
mailing list