changeset 1009:ee5f21acea76

optimization
author Igor Sysoev <igor@sysoev.ru>
date Thu, 11 Jan 2007 16:07:38 +0000
parents 51d0884364fe
children d1792e17a559
files src/os/unix/ngx_gcc_atomic_amd64.h src/os/unix/ngx_gcc_atomic_x86.h
diffstat 2 files changed, 22 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/src/os/unix/ngx_gcc_atomic_amd64.h
+++ b/src/os/unix/ngx_gcc_atomic_amd64.h
@@ -24,8 +24,11 @@
  *
  *
  * The "r" is any register, %rax (%r0) - %r16.
- * The "=a" and "a" are the %rax register.  Although we can return result
- * in any register, we use %rax because it is used in cmpxchgq anyway.
+ * The "=a" and "a" are the %rax register.
+ * Although we can return result in any register, we use "a" because it is
+ * used in cmpxchgq anyway.  The result is actually in %al but not in $rax,
+ * however as the code is inlined gcc can test %al as well as %rax.
+ *
  * The "cc" means that flags were changed.
  */
 
@@ -33,14 +36,13 @@ static ngx_inline ngx_atomic_uint_t
 ngx_atomic_cmp_set(ngx_atomic_t *lock, ngx_atomic_uint_t old,
     ngx_atomic_uint_t set)
 {
-    ngx_atomic_uint_t  res;
+    u_char  res;
 
     __asm__ volatile (
 
          NGX_SMP_LOCK
     "    cmpxchgq  %3, %1;   "
-    "    setz      %b0;      "
-    "    movzbq    %b0, %0;  "
+    "    sete      %0;       "
 
     : "=a" (res) : "m" (*lock), "a" (old), "r" (set) : "cc", "memory");
 
--- a/src/os/unix/ngx_gcc_atomic_x86.h
+++ b/src/os/unix/ngx_gcc_atomic_x86.h
@@ -23,9 +23,13 @@
  *     }
  *
  *
- * The "q" is any of the %eax, %ebx, %ecx, or %edx registers.
- * The "=a" and "a" are the %eax register.  Although we can return result
- * in any register, we use %eax because it is used in cmpxchgl anyway.
+ * The "r" means the general register.
+ * The "=a" and "a" are the %eax register.
+ * Although we can return result in any register, we use "a" because it is
+ * used in cmpxchgl anyway.  The result is actually in %al but not in %eax,
+ * however, as the code is inlined gcc can test %al as well as %eax,
+ * and icc adds "movzbl %al, %eax" by itself.
+ *
  * The "cc" means that flags were changed.
  */
 
@@ -33,16 +37,15 @@ static ngx_inline ngx_atomic_uint_t
 ngx_atomic_cmp_set(ngx_atomic_t *lock, ngx_atomic_uint_t old,
     ngx_atomic_uint_t set)
 {
-    ngx_atomic_uint_t  res;
+    u_char  res;
 
     __asm__ volatile (
 
          NGX_SMP_LOCK
     "    cmpxchgl  %3, %1;   "
-    "    setz      %b0;      "
-    "    movzbl    %b0, %0;  "
+    "    sete      %0;       "
 
-    : "=a" (res) : "m" (*lock), "a" (old), "q" (set) : "cc", "memory");
+    : "=a" (res) : "m" (*lock), "a" (old), "r" (set) : "cc", "memory");
 
     return res;
 }
@@ -56,7 +59,7 @@ ngx_atomic_cmp_set(ngx_atomic_t *lock, n
  *     r = temp;
  *
  *
- * The "+q" is any of the %eax, %ebx, %ecx, or %edx registers.
+ * The "+r" means the general register.
  * The "cc" means that flags were changed.
  */
 
@@ -80,7 +83,7 @@ ngx_atomic_fetch_add(ngx_atomic_t *value
          NGX_SMP_LOCK
     "    xaddl  %0, %1;   "
 
-    : "+q" (add) : "m" (*value) : "cc", "memory");
+    : "+r" (add) : "m" (*value) : "cc", "memory");
 
     return add;
 }
@@ -89,9 +92,9 @@ ngx_atomic_fetch_add(ngx_atomic_t *value
 #else
 
 /*
- * gcc 2.7 does not support "+q", so we have to use the fixed %eax ("=a" and
- * "a") and this adds two superfluous instructions in the end of code,
- * something like this: "mov %eax, %edx / mov %edx, %eax".
+ * gcc 2.7 does not support "+r", so we have to use the fixed
+ * %eax ("=a" and "a") and this adds two superfluous instructions in the end
+ * of code, something like this: "mov %eax, %edx / mov %edx, %eax".
  */
 
 static ngx_inline ngx_atomic_int_t