110
|
1
|
|
2 /*
|
|
3 * Copyright (C) Igor Sysoev
|
|
4 */
|
|
5
|
|
6
|
|
7 #if (NGX_SMP)
|
|
8 #define NGX_SMP_LOCK "lock;"
|
|
9 #else
|
|
10 #define NGX_SMP_LOCK
|
|
11 #endif
|
|
12
|
|
13
|
|
14 /*
|
|
15 * "cmpxchgl r, [m]":
|
|
16 *
|
|
17 * if (eax == [m]) {
|
|
18 * zf = 1;
|
|
19 * [m] = r;
|
|
20 * } else {
|
|
21 * zf = 0;
|
|
22 * eax = [m];
|
|
23 * }
|
|
24 *
|
126
|
25 *
|
274
|
26 * The "r" means the general register.
|
|
27 * The "=a" and "a" are the %eax register.
|
|
28 * Although we can return result in any register, we use "a" because it is
|
|
29 * used in cmpxchgl anyway. The result is actually in %al but not in %eax,
|
|
30 * however, as the code is inlined gcc can test %al as well as %eax,
|
|
31 * and icc adds "movzbl %al, %eax" by itself.
|
|
32 *
|
110
|
33 * The "cc" means that flags were changed.
|
|
34 */
|
|
35
|
|
36 static ngx_inline ngx_atomic_uint_t
|
|
37 ngx_atomic_cmp_set(ngx_atomic_t *lock, ngx_atomic_uint_t old,
|
|
38 ngx_atomic_uint_t set)
|
|
39 {
|
274
|
40 u_char res;
|
110
|
41
|
|
42 __asm__ volatile (
|
|
43
|
|
44 NGX_SMP_LOCK
|
|
45 " cmpxchgl %3, %1; "
|
274
|
46 " sete %0; "
|
110
|
47
|
274
|
48 : "=a" (res) : "m" (*lock), "a" (old), "r" (set) : "cc", "memory");
|
110
|
49
|
|
50 return res;
|
|
51 }
|
|
52
|
|
53
|
|
54 /*
|
|
55 * "xaddl r, [m]":
|
|
56 *
|
|
57 * temp = [m];
|
|
58 * [m] += r;
|
|
59 * r = temp;
|
|
60 *
|
|
61 *
|
274
|
62 * The "+r" means the general register.
|
110
|
63 * The "cc" means that flags were changed.
|
|
64 */
|
|
65
|
|
66
|
138
|
67 #if !(( __GNUC__ == 2 && __GNUC_MINOR__ <= 7 ) || ( __INTEL_COMPILER >= 800 ))
|
|
68
|
|
69 /*
|
|
70 * icc 8.1 and 9.0 compile broken code with -march=pentium4 option:
|
|
71 * ngx_atomic_fetch_add() always return the input "add" value,
|
|
72 * so we use the gcc 2.7 version.
|
|
73 *
|
|
74 * icc 8.1 and 9.0 with -march=pentiumpro option or icc 7.1 compile
|
|
75 * correct code.
|
|
76 */
|
110
|
77
|
|
78 static ngx_inline ngx_atomic_int_t
|
|
79 ngx_atomic_fetch_add(ngx_atomic_t *value, ngx_atomic_int_t add)
|
|
80 {
|
|
81 __asm__ volatile (
|
|
82
|
|
83 NGX_SMP_LOCK
|
|
84 " xaddl %0, %1; "
|
|
85
|
274
|
86 : "+r" (add) : "m" (*value) : "cc", "memory");
|
110
|
87
|
|
88 return add;
|
|
89 }
|
|
90
|
|
91
|
138
|
92 #else
|
110
|
93
|
|
94 /*
|
274
|
95 * gcc 2.7 does not support "+r", so we have to use the fixed
|
|
96 * %eax ("=a" and "a") and this adds two superfluous instructions in the end
|
|
97 * of code, something like this: "mov %eax, %edx / mov %edx, %eax".
|
110
|
98 */
|
|
99
|
|
100 static ngx_inline ngx_atomic_int_t
|
|
101 ngx_atomic_fetch_add(ngx_atomic_t *value, ngx_atomic_int_t add)
|
|
102 {
|
|
103 ngx_atomic_uint_t old;
|
|
104
|
|
105 __asm__ volatile (
|
|
106
|
|
107 NGX_SMP_LOCK
|
|
108 " xaddl %2, %1; "
|
|
109
|
|
110 : "=a" (old) : "m" (*value), "a" (add) : "cc", "memory");
|
|
111
|
|
112 return old;
|
|
113 }
|
|
114
|
|
115 #endif
|
112
|
116
|
|
117
|
|
118 /*
|
|
119 * on x86 the write operations go in a program order, so we need only
|
|
120 * to disable the gcc reorder optimizations
|
|
121 */
|
|
122
|
|
123 #define ngx_memory_barrier() __asm__ volatile ("" ::: "memory")
|
160
|
124
|
|
125 /* old as does not support "pause" opcode */
|
|
126 #define ngx_cpu_pause() __asm__ (".byte 0xf3, 0x90")
|