This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.
Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
| Other format: | [Raw text] | |
Hi!
I was surprised by code which came up from foo = stpcpy(foo, "./") on ia64,
so I ran my stringops benchmark I posted on i686/ia64/alpha/sparc/sparc64
here some months ago.
On all !_STRING_ARCH_unaligned platforms (ia64/alpha/sparc*), all of
mempcpy/strcpy/stpcpy were way slower if "optimizing" and generated much
larger code. Here are e.g. ia64 results (legend as before, in ticks, gcc is without
bits/string*.h opts, likewise nob, but with -fno-builtin too, str is -D__USE_STRING_INLINES
and str2 are just string2.h opts (without any options), * means the time without any
bits/string*.h opts is faster or as fast as the others):
gcc nob str str2
mempcpy
(0,a,1) 45 45 280 276 *
(0,a,2) 48 48 292 290 *
(0,ab,3) 51 51 276 276 *
(0,abc,4) 54 54 299 299 *
(0,abcd,5) 57 57 308 311 *
(0,abcde,6) 60 60 324 332 *
(0,abcdef,7) 63 63 305 306 *
(0,abcdefg,8) 66 66 322 322 *
(0,quitelongstring,0) 34 35 155 155 *
(0,quitelongstring,1) 50 45 287 283 *
(0,quitelongstring,2) 56 48 295 296 *
(0,quitelongstring,3) 63 51 302 303 *
(0,quitelongstring,4) 70 54 303 299 *
(0,quitelongstring,5) 77 57 305 302 *
(0,quitelongstring,6) 84 60 307 302 *
(0,quitelongstring,7) 91 63 307 310 *
(0,quitelongstring,8) 98 66 317 322 *
(5,a,1) 48 48 274 277 *
(5,a,2) 51 51 304 291 *
(5,ab,3) 54 54 275 276 *
(5,abc,4) 57 57 303 306 *
(5,abcd,5) 60 60 306 315 *
(5,abcde,6) 63 63 328 328 *
(5,abcdef,7) 66 66 304 304 *
(5,abcdefg,8) 69 69 323 320 *
(5,quitelongstring,0) 37 37 160 153 *
(5,quitelongstring,1) 52 48 285 287 *
(5,quitelongstring,2) 59 51 299 298 *
(5,quitelongstring,3) 66 54 302 296 *
(5,quitelongstring,4) 73 57 303 299 *
(5,quitelongstring,5) 80 60 304 306 *
(5,quitelongstring,6) 87 63 303 301 *
(5,quitelongstring,7) 94 67 313 306 *
(5,quitelongstring,8) 101 69 320 320 *
stpcpy
(0,NUL) 33 34 286 290 *
(0,a) 40 40 280 280 *
(0,ab) 47 47 301 312 *
(0,abc) 54 54 336 338 *
(0,abcd) 61 61 315 323 *
(0,abcde) 122 72 312 323 *
(0,abcdef) 138 75 305 312 *
(0,abcdefg) 154 82 325 326 *
(5,NUL) 36 36 288 291 *
(5,a) 43 43 277 285 *
(5,ab) 50 58 306 317 *
(5,abc) 57 57 338 336 *
(5,abcd) 64 64 319 323 *
(5,abcde) 125 71 302 313 *
(5,abcdef) 145 78 309 316 *
(5,abcdefg) 157 85 319 320 *
strcpy
(0,NUL) 4 51 300 290 *
(0,a) 14 54 274 292 *
(0,ab) 5 66 308 308 *
(0,abc) 4 69 330 338 *
(0,abcd) 5 72 310 335 *
(0,abcde) 5 75 314 318 *
(0,abcdef) 7 78 309 313 *
(0,abcdefg) 4 81 322 331 *
(5,NUL) 6 49 288 285 *
(5,a) 7 53 279 277 *
(5,ab) 8 59 304 317 *
(5,abc) 9 90 330 335 *
(5,abcd) 10 93 302 319 *
(5,abcde) 11 96 302 305 *
(5,abcdef) 12 99 307 315 *
(5,abcdefg) 13 111 319 321 *
The resulting tester binary was 553221 bytes.
With the patch below:
mempcpy
(0,a,1) 45 45 14 14
(0,a,2) 48 48 4 4
(0,ab,3) 51 51 7 7
(0,abc,4) 54 54 4 4
(0,abcd,5) 57 57 7 7
(0,abcde,6) 60 60 7 7
(0,abcdef,7) 63 63 7 7
(0,abcdefg,8) 66 66 4 4
(0,quitelongstring,0) 35 35 1 1
(0,quitelongstring,1) 45 45 4 4
(0,quitelongstring,2) 49 48 14 14
(0,quitelongstring,3) 51 51 7 7
(0,quitelongstring,4) 54 54 4 4
(0,quitelongstring,5) 57 57 7 7
(0,quitelongstring,6) 60 60 7 7
(0,quitelongstring,7) 63 63 7 7
(0,quitelongstring,8) 66 66 4 4
(5,a,1) 48 48 6 6
(5,a,2) 51 51 7 7
(5,ab,3) 54 54 8 8
(5,abc,4) 57 57 9 9
(5,abcd,5) 60 60 10 10
(5,abcde,6) 63 63 11 11
(5,abcdef,7) 66 66 12 12
(5,abcdefg,8) 69 69 13 13
(5,quitelongstring,0) 37 37 4 4
(5,quitelongstring,1) 48 48 6 6
(5,quitelongstring,2) 51 51 7 7
(5,quitelongstring,3) 54 54 8 8
(5,quitelongstring,4) 57 57 9 9
(5,quitelongstring,5) 60 60 10 10
(5,quitelongstring,6) 63 63 11 11
(5,quitelongstring,7) 66 66 12 12
(5,quitelongstring,8) 69 69 13 13
stpcpy
(0,NUL) 33 33 14 14
(0,a) 40 40 4 4
(0,ab) 47 47 5 5
(0,abc) 54 54 4 4
(0,abcd) 61 61 5 5
(0,abcde) 68 68 7 11
(0,abcdef) 75 75 5 5
(0,abcdefg) 82 82 4 4
(5,NUL) 36 36 6 6
(5,a) 43 43 7 7
(5,ab) 50 50 8 8
(5,abc) 57 57 9 9
(5,abcd) 64 64 10 10
(5,abcde) 71 71 11 11
(5,abcdef) 78 78 12 12
(5,abcdefg) 85 85 13 13
strcpy
(0,NUL) 4 55 14 14 *
(0,a) 14 58 4 4
(0,ab) 5 70 5 5 *
(0,abc) 8 80 4 4
(0,abcd) 5 76 5 5 *
(0,abcde) 5 79 5 5 *
(0,abcdef) 7 82 7 7 *
(0,abcdefg) 4 85 4 4 *
(5,NUL) 6 53 6 6 *
(5,a) 7 57 7 7 *
(5,ab) 8 63 8 8 *
(5,abc) 9 94 9 9 *
(5,abcd) 10 97 10 10 *
(5,abcde) 11 100 11 11 *
(5,abcdef) 12 103 12 12 *
(5,abcdefg) 13 115 13 13 *
and the resulting tester binary was 325285 bytes long (ie. ~230KB shorter).
Similar results on alpha or sparc.
On i686, it usually generates exactly the same code, sometimes just a bit faster.
But the important thing is that the compiler can know more things than the header.
Not to mention it speeds up compilation...
2002-01-23 Jakub Jelinek <jakub@redhat.com>
* string/bits/string2.h (__mempcpy): For gcc 3.0+, don't use
__mempcpy_small but instead use __builtin_memcpy ( , , n) + n for
short lengths and constant src.
(strcpy): Don't optimize for gcc 3.0+.
* (__stpcpy): For gcc 3.0+, don't use
__stpcpy_small but instead use __builtin_strcpy (, src) + strlen (src)
for short string literal src.
--- libc/string/bits/string2.h.jj Wed Nov 21 13:32:37 2001
+++ libc/string/bits/string2.h Wed Jan 23 20:09:05 2002
@@ -1,5 +1,5 @@
/* Machine-independant string function optimizations.
- Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1997,1998,1999,2000,2001,2002 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -198,26 +198,35 @@ __STRING2_COPY_TYPE (8);
#ifdef __USE_GNU
# if !defined _HAVE_STRING_ARCH_mempcpy || defined _FORCE_INLINES
# ifndef _HAVE_STRING_ARCH_mempcpy
-# define __mempcpy(dest, src, n) \
+# if __GNUC_PREREQ (3, 0)
+# define __mempcpy(dest, src, n) \
+ (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \
+ && __string2_1bptr_p (src) && n <= 8 \
+ ? __builtin_memcpy (dest, src, n) + n \
+ : __mempcpy (dest, src, n)))
+# else
+# define __mempcpy(dest, src, n) \
(__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \
&& __string2_1bptr_p (src) && n <= 8 \
? __mempcpy_small (dest, __mempcpy_args (src), n) \
: __mempcpy (dest, src, n)))
+# endif
/* In glibc we use this function frequently but for namespace reasons
we have to use the name `__mempcpy'. */
# define mempcpy(dest, src, n) __mempcpy (dest, src, n)
# endif
-# if _STRING_ARCH_unaligned
-# ifndef _FORCE_INLINES
-# define __mempcpy_args(src) \
+# if !__GNUC_PREREQ (3, 0) || defined _FORCE_INLINES
+# if _STRING_ARCH_unaligned
+# ifndef _FORCE_INLINES
+# define __mempcpy_args(src) \
((__const char *) (src))[0], ((__const char *) (src))[2], \
((__const char *) (src))[4], ((__const char *) (src))[6], \
__extension__ __STRING2_SMALL_GET16 (src, 0), \
__extension__ __STRING2_SMALL_GET16 (src, 4), \
__extension__ __STRING2_SMALL_GET32 (src, 0), \
__extension__ __STRING2_SMALL_GET32 (src, 4)
-# endif
+# endif
__STRING_INLINE void *__mempcpy_small (void *, char, char, char, char,
__uint16_t, __uint16_t, __uint32_t,
__uint32_t, size_t);
@@ -283,9 +292,9 @@ __mempcpy_small (void *__dest1,
}
return (void *) __u;
}
-# else
-# ifndef _FORCE_INLINES
-# define __mempcpy_args(src) \
+# else
+# ifndef _FORCE_INLINES
+# define __mempcpy_args(src) \
((__const char *) (src))[0], \
__extension__ ((__STRING2_COPY_ARR2) \
{ { ((__const char *) (src))[0], ((__const char *) (src))[1] } }), \
@@ -313,7 +322,7 @@ __mempcpy_small (void *__dest1,
((__const char *) (src))[2], ((__const char *) (src))[3], \
((__const char *) (src))[4], ((__const char *) (src))[5], \
((__const char *) (src))[6], ((__const char *) (src))[7] } })
-# endif
+# endif
__STRING_INLINE void *__mempcpy_small (void *, char, __STRING2_COPY_ARR2,
__STRING2_COPY_ARR3,
__STRING2_COPY_ARR4,
@@ -367,6 +376,7 @@ __mempcpy_small (void *__dest, char __sr
}
return __extension__ ((void *) __u + __srclen);
}
+# endif
# endif
# endif
#endif
@@ -383,8 +393,9 @@ extern void *__rawmemchr (const void *__
/* Copy SRC to DEST. */
-#if !defined _HAVE_STRING_ARCH_strcpy || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strcpy
+#if (!defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0)) \
+ || defined _FORCE_INLINES
+# if !defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0)
# define strcpy(dest, src) \
(__extension__ (__builtin_constant_p (src) \
? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \
@@ -547,26 +558,38 @@ __strcpy_small (char *__dest,
#ifdef __USE_GNU
# if !defined _HAVE_STRING_ARCH_stpcpy || defined _FORCE_INLINES
# ifndef _HAVE_STRING_ARCH_stpcpy
-# define __stpcpy(dest, src) \
+# if __GNUC_PREREQ (3, 0)
+# define __stpcpy(dest, src) \
+ (__extension__ (__builtin_constant_p (src) \
+ ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \
+ ? __builtin_strcpy (dest, src) + strlen (src) \
+ : ((char *) (__mempcpy) (dest, src, strlen (src) + 1) \
+ - 1)) \
+ : __stpcpy (dest, src)))
+# else
+# define __stpcpy(dest, src) \
(__extension__ (__builtin_constant_p (src) \
? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \
? __stpcpy_small (dest, __stpcpy_args (src), \
strlen (src) + 1) \
- : ((char *) __mempcpy (dest, src, strlen (src) + 1) - 1))\
+ : ((char *) (__mempcpy) (dest, src, strlen (src) + 1) \
+ - 1)) \
: __stpcpy (dest, src)))
+# endif
/* In glibc we use this function frequently but for namespace reasons
we have to use the name `__stpcpy'. */
# define stpcpy(dest, src) __stpcpy (dest, src)
# endif
-# if _STRING_ARCH_unaligned
-# ifndef _FORCE_INLINES
-# define __stpcpy_args(src) \
+# if !__GNUC_PREREQ (3, 0) || _FORCE_INLINES
+# if _STRING_ARCH_unaligned
+# ifndef _FORCE_INLINES
+# define __stpcpy_args(src) \
__extension__ __STRING2_SMALL_GET16 (src, 0), \
__extension__ __STRING2_SMALL_GET16 (src, 4), \
__extension__ __STRING2_SMALL_GET32 (src, 0), \
__extension__ __STRING2_SMALL_GET32 (src, 4)
-# endif
+# endif
__STRING_INLINE char *__stpcpy_small (char *, __uint16_t, __uint16_t,
__uint32_t, __uint32_t, size_t);
__STRING_INLINE char *
@@ -626,9 +649,9 @@ __stpcpy_small (char *__dest,
}
return &__u->__c;
}
-# else
-# ifndef _FORCE_INLINES
-# define __stpcpy_args(src) \
+# else
+# ifndef _FORCE_INLINES
+# define __stpcpy_args(src) \
__extension__ ((__STRING2_COPY_ARR2) \
{ { ((__const char *) (src))[0], '\0' } }), \
__extension__ ((__STRING2_COPY_ARR3) \
@@ -655,7 +678,7 @@ __stpcpy_small (char *__dest,
((__const char *) (src))[2], ((__const char *) (src))[3], \
((__const char *) (src))[4], ((__const char *) (src))[5], \
((__const char *) (src))[6], '\0' } })
-# endif
+# endif
__STRING_INLINE char *__stpcpy_small (char *, __STRING2_COPY_ARR2,
__STRING2_COPY_ARR3,
__STRING2_COPY_ARR4,
@@ -709,6 +732,7 @@ __stpcpy_small (char *__dest,
}
return __dest + __srclen - 1;
}
+# endif
# endif
# endif
#endif
Jakub
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |