This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
PATCH: More _dl_hwcap and SSE fixes (Re: The usage of _dl_hwcap and dl_hwcap_mask is confusing)
- From: "H. J. Lu" <hjl at lucon dot org>
- To: Ulrich Drepper <drepper at redhat dot com>
- Cc: GNU C Library <libc-alpha at sources dot redhat dot com>
- Date: Tue, 22 Jul 2003 09:57:28 -0700
- Subject: PATCH: More _dl_hwcap and SSE fixes (Re: The usage of _dl_hwcap and dl_hwcap_mask is confusing)
- References: <20030618175132.GA29771@lucon.org> <3F1C81EB.1090702@redhat.com>
On Mon, Jul 21, 2003 at 05:14:35PM -0700, Ulrich Drepper wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> I have changed this code now. I meant in fact to use dl_hwcap and not
> dl_hwcap_mask. If the hardware is available it needs to be handled.
> Regardless of whether other DSOs (or libc DSO itself) are loaded for the
> appropriate subdir.
>
Thanks for looking into it. There are still a few issues remaining:
1. _dl_hwcap is not available in libc.a. "gcc -static ... -lm" may
fail.
2. Should we set/clear MXCSR if the CPU supports SSE?
3. Should we set MXCSR during process initialization?
This patch adds _dl_hwcap to libc.a, sets/clears MXCSR and set MXCSR
during process initialization.
H.J.
----
2003-07-22 H.J. Lu <hongjiu.lu@intel.com>
* elf/dl-support.c (_dl_hwcap): New.
(_dl_aux_init): Initialize GL(dl_hwcap) for static binaries.
* sysdeps/i386/fpu/fclrexcpt.c: Include <unistd.h>, <ldsodefs.h>
and <dl-procinfo.h>.
(__feclearexcept): Clear MXCSR if needed.
* sysdeps/i386/fpu/fsetexcptflg.c: Likewise.
* sysdeps/i386/fpu_control.h (_FPU_GETCW): Commented out.
(_FPU_SETCW): Likewise.
* sysdeps/i386/setfpucw.c: New file. Support SSE and SSE2.
--- libc/elf/dl-support.c.p4 2003-06-28 16:10:26.000000000 -0700
+++ libc/elf/dl-support.c 2003-07-22 09:00:51.000000000 -0700
@@ -123,6 +123,7 @@ int _dl_correct_cache_id = _DL_CACHE_DEF
struct ElfW(Phdr) *_dl_phdr;
size_t _dl_phnum;
+unsigned long int _dl_hwcap;
#ifdef NEED_DL_SYSINFO
/* Needed for improved syscall handling on at least x86/Linux. */
@@ -167,6 +168,9 @@ _dl_aux_init (ElfW(auxv_t) *av)
case AT_PHNUM:
GL(dl_phnum) = av->a_un.a_val;
break;
+ case AT_HWCAP:
+ GL(dl_hwcap) = av->a_un.a_val;
+ break;
#ifdef NEED_DL_SYSINFO
case AT_SYSINFO:
GL(dl_sysinfo) = av->a_un.a_val;
--- libc/sysdeps/i386/fpu/fclrexcpt.c.p4 2001-07-05 21:55:53.000000000 -0700
+++ libc/sysdeps/i386/fpu/fclrexcpt.c 2003-07-22 09:00:51.000000000 -0700
@@ -19,6 +19,9 @@
02111-1307 USA. */
#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
int
__feclearexcept (int excepts)
@@ -38,6 +41,21 @@ __feclearexcept (int excepts)
/* Put the new data in effect. */
__asm__ ("fldenv %0" : : "m" (*&temp));
+ /* If the CPU supports SSE, we clear the MXCSR as well. */
+ if ((GL(dl_hwcap) & HWCAP_I386_XMM) != 0)
+ {
+ unsigned int xnew_exc;
+
+ /* Get the current MXCSR. */
+ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc));
+
+ /* Clear the relevant bits. */
+ xnew_exc &= excepts ^ FE_ALL_EXCEPT;
+
+ /* Put the new data in effect. */
+ __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc));
+ }
+
/* Success. */
return 0;
}
--- libc/sysdeps/i386/fpu/fsetexcptflg.c.p4 2001-07-05 21:55:53.000000000 -0700
+++ libc/sysdeps/i386/fpu/fsetexcptflg.c 2003-07-22 09:00:51.000000000 -0700
@@ -21,6 +21,9 @@
#include <fenv.h>
#include <math.h>
#include <bp-sym.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
int
__fesetexceptflag (const fexcept_t *flagp, int excepts)
@@ -39,6 +42,22 @@ __fesetexceptflag (const fexcept_t *flag
the next floating-point instruction. */
__asm__ ("fldenv %0" : : "m" (*&temp));
+ /* If the CPU supports SSE, we set the MXCSR as well. */
+ if ((GL(dl_hwcap) & HWCAP_I386_XMM) != 0)
+ {
+ unsigned int xnew_exc;
+
+ /* Get the current MXCSR. */
+ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc));
+
+ /* Set the relevant bits. */
+ xnew_exc &= ~(excepts & FE_ALL_EXCEPT);
+ xnew_exc |= *flagp & excepts & FE_ALL_EXCEPT;
+
+ /* Put the new data in effect. */
+ __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc));
+ }
+
/* Success. */
return 0;
}
--- libc/sysdeps/i386/fpu_control.h.p4 2001-07-05 21:55:52.000000000 -0700
+++ libc/sysdeps/i386/fpu_control.h 2003-07-22 09:00:51.000000000 -0700
@@ -88,9 +88,12 @@
/* Type of the control word. */
typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__HI__)));
+#if 0
+/* Need to set MXCSR for SSE/SSE2. */
/* Macros for accessing the hardware control word. */
#define _FPU_GETCW(cw) __asm__ ("fnstcw %0" : "=m" (*&cw))
#define _FPU_SETCW(cw) __asm__ ("fldcw %0" : : "m" (*&cw))
+#endif
/* Default control word set at startup. */
extern fpu_control_t __fpu_control;
--- libc/sysdeps/i386/setfpucw.c.p4 2003-07-22 09:00:51.000000000 -0700
+++ libc/sysdeps/i386/setfpucw.c 2003-07-22 09:00:51.000000000 -0700
@@ -0,0 +1,55 @@
+/* Set the FPU control word for x86.
+ Copyright (C) 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <math.h>
+#include <fpu_control.h>
+#include <fenv.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <dl-procinfo.h>
+
+void
+__setfpucw (fpu_control_t set)
+{
+ fpu_control_t cw;
+
+ /* Fetch the current control word. */
+ __asm__ ("fnstcw %0" : "=m" (*&cw));
+
+ /* Preserve the reserved bits, and set the rest as the user
+ specified (or the default, if the user gave zero). */
+ cw &= _FPU_RESERVED;
+ cw |= set & ~_FPU_RESERVED;
+
+ __asm__ ("fldcw %0" : : "m" (*&cw));
+
+ /* If the CPU supports SSE, we set the MXCSR as well. */
+ if ((GL(dl_hwcap) & HWCAP_I386_XMM) != 0)
+ {
+ unsigned int xnew_exc;
+
+ /* Get the current MXCSR. */
+ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc));
+
+ xnew_exc &= ~((0xc00 << 3) | (FE_ALL_EXCEPT << 7));
+ xnew_exc |= ((set & 0xc00) << 3) | ((set & FE_ALL_EXCEPT) << 7);
+
+ __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc));
+ }
+}