0
|
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
2 /* $Id: memcached.h 570 2007-06-20 01:21:45Z plindner $ */
|
|
3
|
|
4 #include "config.h"
|
|
5 #include <sys/types.h>
|
|
6 #include <sys/socket.h>
|
|
7 #include <sys/time.h>
|
|
8 #include <netinet/in.h>
|
|
9 #include <event.h>
|
|
10
|
|
11 #define DATA_BUFFER_SIZE 2048
|
|
12 #define UDP_READ_BUFFER_SIZE 65536
|
|
13 #define UDP_MAX_PAYLOAD_SIZE 1400
|
|
14 #define UDP_HEADER_SIZE 8
|
|
15 #define MAX_SENDBUF_SIZE (256 * 1024 * 1024)
|
|
16
|
|
17 /* Initial size of list of items being returned by "get". */
|
|
18 #define ITEM_LIST_INITIAL 200
|
|
19
|
|
20 /* Initial size of the sendmsg() scatter/gather array. */
|
|
21 #define IOV_LIST_INITIAL 400
|
|
22
|
|
23 /* Initial number of sendmsg() argument structures to allocate. */
|
|
24 #define MSG_LIST_INITIAL 10
|
|
25
|
|
26 /* High water marks for buffer shrinking */
|
|
27 #define READ_BUFFER_HIGHWAT 8192
|
|
28 #define ITEM_LIST_HIGHWAT 400
|
|
29 #define IOV_LIST_HIGHWAT 600
|
|
30 #define MSG_LIST_HIGHWAT 100
|
|
31
|
|
32 /* Get a consistent bool type */
|
|
33 #if HAVE_STDBOOL_H
|
|
34 # include <stdbool.h>
|
|
35 #else
|
|
36 typedef enum {false = 0, true = 1} bool;
|
|
37 #endif
|
|
38
|
|
39 #if HAVE_STDINT_H
|
|
40 # include <stdint.h>
|
|
41 #else
|
|
42 typedef unsigned char uint8_t;
|
|
43 #endif
|
|
44
|
|
45 /* unistd.h is here */
|
|
46 #if HAVE_UNISTD_H
|
|
47 # include <unistd.h>
|
|
48 #endif
|
|
49
|
|
50 /* Time relative to server start. Smaller than time_t on 64-bit systems. */
|
|
51 typedef unsigned int rel_time_t;
|
|
52
|
|
53 struct stats {
|
|
54 unsigned int curr_items;
|
|
55 unsigned int total_items;
|
|
56 uint64_t curr_bytes;
|
|
57 unsigned int curr_conns;
|
|
58 unsigned int total_conns;
|
|
59 unsigned int conn_structs;
|
|
60 uint64_t get_cmds;
|
|
61 uint64_t set_cmds;
|
|
62 uint64_t get_hits;
|
|
63 uint64_t get_misses;
|
|
64 uint64_t evictions;
|
|
65 time_t started; /* when the process was started */
|
|
66 uint64_t bytes_read;
|
|
67 uint64_t bytes_written;
|
|
68 };
|
|
69
|
|
70 #define MAX_VERBOSITY_LEVEL 2
|
|
71
|
|
72 struct settings {
|
|
73 size_t maxbytes;
|
|
74 int maxconns;
|
|
75 int port;
|
|
76 int udpport;
|
|
77 struct in_addr interf;
|
|
78 int verbose;
|
|
79 rel_time_t oldest_live; /* ignore existing items older than this */
|
|
80 bool managed; /* if 1, a tracker manages virtual buckets */
|
|
81 int evict_to_free;
|
|
82 char *socketpath; /* path to unix socket if using local socket */
|
|
83 double factor; /* chunk size growth factor */
|
|
84 int chunk_size;
|
|
85 int num_threads; /* number of libevent threads to run */
|
|
86 char prefix_delimiter; /* character that marks a key prefix (for stats) */
|
|
87 int detail_enabled; /* nonzero if we're collecting detailed stats */
|
|
88 };
|
|
89
|
|
90 extern struct stats stats;
|
|
91 extern struct settings settings;
|
|
92
|
|
93 #define ITEM_LINKED 1
|
|
94 #define ITEM_DELETED 2
|
|
95
|
|
96 /* temp */
|
|
97 #define ITEM_SLABBED 4
|
|
98
|
|
99 typedef struct _stritem {
|
|
100 struct _stritem *next;
|
|
101 struct _stritem *prev;
|
|
102 struct _stritem *h_next; /* hash chain next */
|
|
103 rel_time_t time; /* least recent access */
|
|
104 rel_time_t exptime; /* expire time */
|
|
105 int nbytes; /* size of data */
|
|
106 unsigned short refcount;
|
|
107 uint8_t nsuffix; /* length of flags-and-length string */
|
|
108 uint8_t it_flags; /* ITEM_* above */
|
|
109 uint8_t slabs_clsid;/* which slab class we're in */
|
|
110 uint8_t nkey; /* key length, w/terminating null and padding */
|
|
111 void * end[];
|
|
112 /* then null-terminated key */
|
|
113 /* then " flags length\r\n" (no terminating null) */
|
|
114 /* then data with terminating \r\n (no terminating null; it's binary!) */
|
|
115 } item;
|
|
116
|
|
117 #define ITEM_key(item) ((char*)&((item)->end[0]))
|
|
118
|
|
119 /* warning: don't use these macros with a function, as it evals its arg twice */
|
|
120 #define ITEM_suffix(item) ((char*) &((item)->end[0]) + (item)->nkey + 1)
|
|
121 #define ITEM_data(item) ((char*) &((item)->end[0]) + (item)->nkey + 1 + (item)->nsuffix)
|
|
122 #define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 + (item)->nsuffix + (item)->nbytes)
|
|
123
|
|
124 enum conn_states {
|
|
125 conn_listening, /* the socket which listens for connections */
|
|
126 conn_read, /* reading in a command line */
|
|
127 conn_write, /* writing out a simple response */
|
|
128 conn_nread, /* reading in a fixed number of bytes */
|
|
129 conn_swallow, /* swallowing unnecessary bytes w/o storing */
|
|
130 conn_closing, /* closing this connection */
|
|
131 conn_mwrite /* writing out many items sequentially */
|
|
132 };
|
|
133
|
|
134 #define NREAD_ADD 1
|
|
135 #define NREAD_SET 2
|
|
136 #define NREAD_REPLACE 3
|
2
|
137 #define NREAD_APPEND 4
|
|
138 #define NREAD_PREPEND 5
|
0
|
139
|
|
140 typedef struct {
|
|
141 int sfd;
|
|
142 int state;
|
|
143 struct event event;
|
|
144 short ev_flags;
|
|
145 short which; /* which events were just triggered */
|
|
146
|
|
147 char *rbuf; /* buffer to read commands into */
|
|
148 char *rcurr; /* but if we parsed some already, this is where we stopped */
|
|
149 int rsize; /* total allocated size of rbuf */
|
|
150 int rbytes; /* how much data, starting from rcur, do we have unparsed */
|
|
151
|
|
152 char *wbuf;
|
|
153 char *wcurr;
|
|
154 int wsize;
|
|
155 int wbytes;
|
|
156 int write_and_go; /* which state to go into after finishing current write */
|
|
157 void *write_and_free; /* free this memory after finishing writing */
|
|
158
|
|
159 char *ritem; /* when we read in an item's value, it goes here */
|
|
160 int rlbytes;
|
|
161
|
|
162 /* data for the nread state */
|
|
163
|
|
164 /*
|
|
165 * item is used to hold an item structure created after reading the command
|
|
166 * line of set/add/replace commands, but before we finished reading the actual
|
|
167 * data. The data is read into ITEM_data(item) to avoid extra copying.
|
|
168 */
|
|
169
|
|
170 void *item; /* for commands set/add/replace */
|
|
171 int item_comm; /* which one is it: set/add/replace */
|
|
172
|
|
173 /* data for the swallow state */
|
|
174 int sbytes; /* how many bytes to swallow */
|
|
175
|
|
176 /* data for the mwrite state */
|
|
177 struct iovec *iov;
|
|
178 int iovsize; /* number of elements allocated in iov[] */
|
|
179 int iovused; /* number of elements used in iov[] */
|
|
180
|
|
181 struct msghdr *msglist;
|
|
182 int msgsize; /* number of elements allocated in msglist[] */
|
|
183 int msgused; /* number of elements used in msglist[] */
|
|
184 int msgcurr; /* element in msglist[] being transmitted now */
|
|
185 int msgbytes; /* number of bytes in current msg */
|
|
186
|
|
187 item **ilist; /* list of items to write out */
|
|
188 int isize;
|
|
189 item **icurr;
|
|
190 int ileft;
|
|
191
|
|
192 /* data for UDP clients */
|
|
193 bool udp; /* is this is a UDP "connection" */
|
|
194 int request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
|
|
195 struct sockaddr request_addr; /* Who sent the most recent request */
|
|
196 socklen_t request_addr_size;
|
|
197 unsigned char *hdrbuf; /* udp packet headers */
|
|
198 int hdrsize; /* number of headers' worth of space is allocated */
|
|
199
|
|
200 int binary; /* are we in binary mode */
|
|
201 int bucket; /* bucket number for the next command, if running as
|
|
202 a managed instance. -1 (_not_ 0) means invalid. */
|
|
203 int gen; /* generation requested for the bucket */
|
|
204 } conn;
|
|
205
|
|
206 /* number of virtual buckets for a managed instance */
|
|
207 #define MAX_BUCKETS 32768
|
|
208
|
|
209 /* current time of day (updated periodically) */
|
|
210 extern volatile rel_time_t current_time;
|
|
211
|
|
212 /* temporary hack */
|
|
213 /* #define assert(x) if(!(x)) { printf("assert failure: %s\n", #x); pre_gdb(); }
|
|
214 void pre_gdb (); */
|
|
215
|
|
216 /*
|
|
217 * Functions
|
|
218 */
|
|
219
|
|
220 conn *do_conn_from_freelist();
|
|
221 int do_conn_add_to_freelist(conn *c);
|
|
222 char *do_defer_delete(item *item, time_t exptime);
|
|
223 void do_run_deferred_deletes(void);
|
|
224 char *do_add_delta(item *item, int incr, const unsigned int delta, char *buf);
|
|
225 int do_store_item(item *item, int comm);
|
|
226 conn *conn_new(const int sfd, const int init_state, const int event_flags, const int read_buffer_size, const bool is_udp, struct event_base *base);
|
|
227
|
|
228
|
|
229 #include "stats.h"
|
|
230 #include "slabs.h"
|
|
231 #include "assoc.h"
|
|
232 #include "items.h"
|
|
233
|
|
234
|
|
235 /*
|
|
236 * In multithreaded mode, we wrap certain functions with lock management and
|
|
237 * replace the logic of some other functions. All wrapped functions have
|
|
238 * "mt_" and "do_" variants. In multithreaded mode, the plain version of a
|
|
239 * function is #define-d to the "mt_" variant, which often just grabs a
|
|
240 * lock and calls the "do_" function. In singlethreaded mode, the "do_"
|
|
241 * function is called directly.
|
|
242 *
|
|
243 * Functions such as the libevent-related calls that need to do cross-thread
|
|
244 * communication in multithreaded mode (rather than actually doing the work
|
|
245 * in the current thread) are called via "dispatch_" frontends, which are
|
|
246 * also #define-d to directly call the underlying code in singlethreaded mode.
|
|
247 */
|
|
248 #ifdef USE_THREADS
|
|
249
|
|
250 void thread_init(int nthreads, struct event_base *main_base);
|
|
251 int dispatch_event_add(int thread, conn *c);
|
|
252 void dispatch_conn_new(int sfd, int init_state, int event_flags, int read_buffer_size, int is_udp);
|
|
253
|
|
254 /* Lock wrappers for cache functions that are called from main loop. */
|
|
255 char *mt_add_delta(item *item, const int incr, const unsigned int delta, char *buf);
|
|
256 void mt_assoc_move_next_bucket(void);
|
|
257 conn *mt_conn_from_freelist(void);
|
|
258 int mt_conn_add_to_freelist(conn *c);
|
|
259 char *mt_defer_delete(item *it, time_t exptime);
|
|
260 int mt_is_listen_thread(void);
|
|
261 item *mt_item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes);
|
|
262 void mt_item_flush_expired(void);
|
|
263 item *mt_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked);
|
|
264 int mt_item_link(item *it);
|
|
265 void mt_item_remove(item *it);
|
|
266 int mt_item_replace(item *it, item *new_it);
|
|
267 void mt_item_unlink(item *it);
|
|
268 void mt_item_update(item *it);
|
|
269 void mt_run_deferred_deletes(void);
|
|
270 void *mt_slabs_alloc(size_t size);
|
|
271 void mt_slabs_free(void *ptr, size_t size);
|
|
272 int mt_slabs_reassign(unsigned char srcid, unsigned char dstid);
|
|
273 char *mt_slabs_stats(int *buflen);
|
|
274 void mt_stats_lock(void);
|
|
275 void mt_stats_unlock(void);
|
|
276 int mt_store_item(item *item, int comm);
|
|
277
|
|
278
|
|
279 # define add_delta(x,y,z,a) mt_add_delta(x,y,z,a)
|
|
280 # define assoc_move_next_bucket() mt_assoc_move_next_bucket()
|
|
281 # define conn_from_freelist() mt_conn_from_freelist()
|
|
282 # define conn_add_to_freelist(x) mt_conn_add_to_freelist(x)
|
|
283 # define defer_delete(x,y) mt_defer_delete(x,y)
|
|
284 # define is_listen_thread() mt_is_listen_thread()
|
|
285 # define item_alloc(x,y,z,a,b) mt_item_alloc(x,y,z,a,b)
|
|
286 # define item_flush_expired() mt_item_flush_expired()
|
|
287 # define item_get_notedeleted(x,y,z) mt_item_get_notedeleted(x,y,z)
|
|
288 # define item_link(x) mt_item_link(x)
|
|
289 # define item_remove(x) mt_item_remove(x)
|
|
290 # define item_replace(x,y) mt_item_replace(x,y)
|
|
291 # define item_update(x) mt_item_update(x)
|
|
292 # define item_unlink(x) mt_item_unlink(x)
|
|
293 # define run_deferred_deletes() mt_run_deferred_deletes()
|
|
294 # define slabs_alloc(x) mt_slabs_alloc(x)
|
|
295 # define slabs_free(x,y) mt_slabs_free(x,y)
|
|
296 # define slabs_reassign(x,y) mt_slabs_reassign(x,y)
|
|
297 # define slabs_stats(x) mt_slabs_stats(x)
|
|
298 # define store_item(x,y) mt_store_item(x,y)
|
|
299
|
|
300 # define STATS_LOCK() mt_stats_lock()
|
|
301 # define STATS_UNLOCK() mt_stats_unlock()
|
|
302
|
|
303 #else /* !USE_THREADS */
|
|
304
|
|
305 # define add_delta(x,y,z,a) do_add_delta(x,y,z,a)
|
|
306 # define assoc_move_next_bucket() do_assoc_move_next_bucket()
|
|
307 # define conn_from_freelist() do_conn_from_freelist()
|
|
308 # define conn_add_to_freelist(x) do_conn_add_to_freelist(x)
|
|
309 # define defer_delete(x,y) do_defer_delete(x,y)
|
|
310 # define dispatch_conn_new(x,y,z,a,b) conn_new(x,y,z,a,b,main_base)
|
|
311 # define dispatch_event_add(t,c) event_add(&(c)->event, 0)
|
|
312 # define is_listen_thread() 1
|
|
313 # define item_alloc(x,y,z,a,b) do_item_alloc(x,y,z,a,b)
|
|
314 # define item_flush_expired() do_item_flush_expired()
|
|
315 # define item_get_notedeleted(x,y,z) do_item_get_notedeleted(x,y,z)
|
|
316 # define item_link(x) do_item_link(x)
|
|
317 # define item_remove(x) do_item_remove(x)
|
|
318 # define item_replace(x,y) do_item_replace(x,y)
|
|
319 # define item_unlink(x) do_item_unlink(x)
|
|
320 # define item_update(x) do_item_update(x)
|
|
321 # define run_deferred_deletes() do_run_deferred_deletes()
|
|
322 # define slabs_alloc(x) do_slabs_alloc(x)
|
|
323 # define slabs_free(x,y) do_slabs_free(x,y)
|
|
324 # define slabs_reassign(x,y) do_slabs_reassign(x,y)
|
|
325 # define slabs_stats(x) do_slabs_stats(x)
|
|
326 # define store_item(x,y) do_store_item(x,y)
|
|
327 # define thread_init(x,y) 0
|
|
328
|
|
329 # define STATS_LOCK() /**/
|
|
330 # define STATS_UNLOCK() /**/
|
|
331
|
|
332 #endif /* !USE_THREADS */
|
|
333
|
|
334
|