110
|
1
|
|
2 /*
|
|
3 * Copyright (C) Igor Sysoev
|
|
4 */
|
|
5
|
|
6
|
|
7 #if (NGX_SMP)
|
|
8 #define NGX_SMP_LOCK "lock;"
|
|
9 #else
|
|
10 #define NGX_SMP_LOCK
|
|
11 #endif
|
|
12
|
|
13
|
|
14 /*
|
|
15 * "cmpxchgl r, [m]":
|
|
16 *
|
|
17 * if (eax == [m]) {
|
|
18 * zf = 1;
|
|
19 * [m] = r;
|
|
20 * } else {
|
|
21 * zf = 0;
|
|
22 * eax = [m];
|
|
23 * }
|
|
24 *
|
126
|
25 *
|
110
|
26 * The "q" is any of the %eax, %ebx, %ecx, or %edx registers.
|
|
27 * The "=a" and "a" are the %eax register. Although we can return result
|
|
28 * in any register, we use %eax because it is used in cmpxchgl anyway.
|
|
29 * The "cc" means that flags were changed.
|
|
30 */
|
|
31
|
|
32 static ngx_inline ngx_atomic_uint_t
|
|
33 ngx_atomic_cmp_set(ngx_atomic_t *lock, ngx_atomic_uint_t old,
|
|
34 ngx_atomic_uint_t set)
|
|
35 {
|
|
36 ngx_atomic_uint_t res;
|
|
37
|
|
38 __asm__ volatile (
|
|
39
|
|
40 NGX_SMP_LOCK
|
|
41 " cmpxchgl %3, %1; "
|
|
42 " setz %b0; "
|
|
43 " movzbl %b0, %0; "
|
|
44
|
|
45 : "=a" (res) : "m" (*lock), "a" (old), "q" (set) : "cc", "memory");
|
|
46
|
|
47 return res;
|
|
48 }
|
|
49
|
|
50
|
|
51 /*
|
|
52 * "xaddl r, [m]":
|
|
53 *
|
|
54 * temp = [m];
|
|
55 * [m] += r;
|
|
56 * r = temp;
|
|
57 *
|
|
58 *
|
|
59 * The "+q" is any of the %eax, %ebx, %ecx, or %edx registers.
|
|
60 * The "cc" means that flags were changed.
|
|
61 */
|
|
62
|
|
63
|
138
|
64 #if !(( __GNUC__ == 2 && __GNUC_MINOR__ <= 7 ) || ( __INTEL_COMPILER >= 800 ))
|
|
65
|
|
66 /*
|
|
67 * icc 8.1 and 9.0 compile broken code with -march=pentium4 option:
|
|
68 * ngx_atomic_fetch_add() always return the input "add" value,
|
|
69 * so we use the gcc 2.7 version.
|
|
70 *
|
|
71 * icc 8.1 and 9.0 with -march=pentiumpro option or icc 7.1 compile
|
|
72 * correct code.
|
|
73 */
|
110
|
74
|
|
75 static ngx_inline ngx_atomic_int_t
|
|
76 ngx_atomic_fetch_add(ngx_atomic_t *value, ngx_atomic_int_t add)
|
|
77 {
|
|
78 __asm__ volatile (
|
|
79
|
|
80 NGX_SMP_LOCK
|
|
81 " xaddl %0, %1; "
|
|
82
|
|
83 : "+q" (add) : "m" (*value) : "cc", "memory");
|
|
84
|
|
85 return add;
|
|
86 }
|
|
87
|
|
88
|
138
|
89 #else
|
110
|
90
|
|
91 /*
|
|
92 * gcc 2.7 does not support "+q", so we have to use the fixed %eax ("=a" and
|
|
93 * "a") and this adds two superfluous instructions in the end of code,
|
|
94 * something like this: "mov %eax, %edx / mov %edx, %eax".
|
|
95 */
|
|
96
|
|
97 static ngx_inline ngx_atomic_int_t
|
|
98 ngx_atomic_fetch_add(ngx_atomic_t *value, ngx_atomic_int_t add)
|
|
99 {
|
|
100 ngx_atomic_uint_t old;
|
|
101
|
|
102 __asm__ volatile (
|
|
103
|
|
104 NGX_SMP_LOCK
|
|
105 " xaddl %2, %1; "
|
|
106
|
|
107 : "=a" (old) : "m" (*value), "a" (add) : "cc", "memory");
|
|
108
|
|
109 return old;
|
|
110 }
|
|
111
|
|
112 #endif
|
112
|
113
|
|
114
|
|
115 /*
|
|
116 * on x86 the write operations go in a program order, so we need only
|
|
117 * to disable the gcc reorder optimizations
|
|
118 */
|
|
119
|
|
120 #define ngx_memory_barrier() __asm__ volatile ("" ::: "memory")
|
160
|
121
|
|
122 /* old as does not support "pause" opcode */
|
|
123 #define ngx_cpu_pause() __asm__ (".byte 0xf3, 0x90")
|