# HG changeset patch # User Igor Sysoev # Date 1168531658 0 # Node ID ee5f21acea763acd48fee8064e09a1c3f30f02fc # Parent 51d0884364fe0b8d75690157acc145702b4c065b optimization diff --git a/src/os/unix/ngx_gcc_atomic_amd64.h b/src/os/unix/ngx_gcc_atomic_amd64.h --- a/src/os/unix/ngx_gcc_atomic_amd64.h +++ b/src/os/unix/ngx_gcc_atomic_amd64.h @@ -24,8 +24,11 @@ * * * The "r" is any register, %rax (%r0) - %r16. - * The "=a" and "a" are the %rax register. Although we can return result - * in any register, we use %rax because it is used in cmpxchgq anyway. + * The "=a" and "a" are the %rax register. + * Although we can return result in any register, we use "a" because it is + * used in cmpxchgq anyway. The result is actually in %al but not in $rax, + * however as the code is inlined gcc can test %al as well as %rax. + * * The "cc" means that flags were changed. */ @@ -33,14 +36,13 @@ static ngx_inline ngx_atomic_uint_t ngx_atomic_cmp_set(ngx_atomic_t *lock, ngx_atomic_uint_t old, ngx_atomic_uint_t set) { - ngx_atomic_uint_t res; + u_char res; __asm__ volatile ( NGX_SMP_LOCK " cmpxchgq %3, %1; " - " setz %b0; " - " movzbq %b0, %0; " + " sete %0; " : "=a" (res) : "m" (*lock), "a" (old), "r" (set) : "cc", "memory"); diff --git a/src/os/unix/ngx_gcc_atomic_x86.h b/src/os/unix/ngx_gcc_atomic_x86.h --- a/src/os/unix/ngx_gcc_atomic_x86.h +++ b/src/os/unix/ngx_gcc_atomic_x86.h @@ -23,9 +23,13 @@ * } * * - * The "q" is any of the %eax, %ebx, %ecx, or %edx registers. - * The "=a" and "a" are the %eax register. Although we can return result - * in any register, we use %eax because it is used in cmpxchgl anyway. + * The "r" means the general register. + * The "=a" and "a" are the %eax register. + * Although we can return result in any register, we use "a" because it is + * used in cmpxchgl anyway. The result is actually in %al but not in %eax, + * however, as the code is inlined gcc can test %al as well as %eax, + * and icc adds "movzbl %al, %eax" by itself. + * * The "cc" means that flags were changed. */ @@ -33,16 +37,15 @@ static ngx_inline ngx_atomic_uint_t ngx_atomic_cmp_set(ngx_atomic_t *lock, ngx_atomic_uint_t old, ngx_atomic_uint_t set) { - ngx_atomic_uint_t res; + u_char res; __asm__ volatile ( NGX_SMP_LOCK " cmpxchgl %3, %1; " - " setz %b0; " - " movzbl %b0, %0; " + " sete %0; " - : "=a" (res) : "m" (*lock), "a" (old), "q" (set) : "cc", "memory"); + : "=a" (res) : "m" (*lock), "a" (old), "r" (set) : "cc", "memory"); return res; } @@ -56,7 +59,7 @@ ngx_atomic_cmp_set(ngx_atomic_t *lock, n * r = temp; * * - * The "+q" is any of the %eax, %ebx, %ecx, or %edx registers. + * The "+r" means the general register. * The "cc" means that flags were changed. */ @@ -80,7 +83,7 @@ ngx_atomic_fetch_add(ngx_atomic_t *value NGX_SMP_LOCK " xaddl %0, %1; " - : "+q" (add) : "m" (*value) : "cc", "memory"); + : "+r" (add) : "m" (*value) : "cc", "memory"); return add; } @@ -89,9 +92,9 @@ ngx_atomic_fetch_add(ngx_atomic_t *value #else /* - * gcc 2.7 does not support "+q", so we have to use the fixed %eax ("=a" and - * "a") and this adds two superfluous instructions in the end of code, - * something like this: "mov %eax, %edx / mov %edx, %eax". + * gcc 2.7 does not support "+r", so we have to use the fixed + * %eax ("=a" and "a") and this adds two superfluous instructions in the end + * of code, something like this: "mov %eax, %edx / mov %edx, %eax". */ static ngx_inline ngx_atomic_int_t