comparison src/event/quic/ngx_event_quic_bpf.c @ 8676:7df607cb2d11 quic

QUIC: ngx_quic_bpf module. The quic kernel bpf helper inspects packet payload for DCID, extracts key and routes the packet into socket matching the key. Due to reuseport feature, each worker owns a personal socket, which is identified by the same key, used to create DCID. BPF objects are locked in RAM and are subject to RLIMIT_MEMLOCK. The "ulimit -l" command may be used to setup proper limits, if maps cannot be created with EPERM or updated with ETOOLONG.
author Vladimir Homutov <vl@nginx.com>
date Fri, 25 Dec 2020 15:01:15 +0300
parents
children d4e02b3b734f
comparison
equal deleted inserted replaced
8675:d3747ba486e7 8676:7df607cb2d11
1
2 /*
3 * Copyright (C) Nginx, Inc.
4 */
5
6
7 #include <ngx_config.h>
8 #include <ngx_core.h>
9
10
11 #define NGX_QUIC_BPF_VARNAME "NGINX_BPF_MAPS"
12 #define NGX_QUIC_BPF_VARSEP ';'
13 #define NGX_QUIC_BPF_ADDRSEP '#'
14
15
16 #define ngx_quic_bpf_get_conf(cycle) \
17 (ngx_quic_bpf_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_quic_bpf_module)
18
19 #define ngx_quic_bpf_get_old_conf(cycle) \
20 cycle->old_cycle->conf_ctx ? ngx_quic_bpf_get_conf(cycle->old_cycle) \
21 : NULL
22
23 #define ngx_core_get_conf(cycle) \
24 (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module)
25
26
27 typedef struct {
28 ngx_queue_t queue;
29 int map_fd;
30
31 struct sockaddr *sockaddr;
32 socklen_t socklen;
33 ngx_uint_t unused; /* unsigned unused:1; */
34 } ngx_quic_sock_group_t;
35
36
37 typedef struct {
38 ngx_flag_t enabled;
39 ngx_uint_t map_size;
40 ngx_queue_t groups; /* of ngx_quic_sock_group_t */
41 } ngx_quic_bpf_conf_t;
42
43
44 static void *ngx_quic_bpf_create_conf(ngx_cycle_t *cycle);
45 static ngx_int_t ngx_quic_bpf_module_init(ngx_cycle_t *cycle);
46
47 static void ngx_quic_bpf_cleanup(void *data);
48 static ngx_inline void ngx_quic_bpf_close(ngx_log_t *log, int fd,
49 const char *name);
50
51 static ngx_quic_sock_group_t *ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf,
52 ngx_listening_t *ls);
53 static ngx_quic_sock_group_t *ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle,
54 struct sockaddr *sa, socklen_t socklen);
55 static ngx_quic_sock_group_t *ngx_quic_bpf_create_group(ngx_cycle_t *cycle,
56 ngx_listening_t *ls);
57 static ngx_quic_sock_group_t *ngx_quic_bpf_get_group(ngx_cycle_t *cycle,
58 ngx_listening_t *ls);
59 static ngx_int_t ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle,
60 ngx_listening_t *ls);
61 static uint64_t ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log);
62
63 static ngx_int_t ngx_quic_bpf_export_maps(ngx_cycle_t *cycle);
64 static ngx_int_t ngx_quic_bpf_import_maps(ngx_cycle_t *cycle);
65
66 extern ngx_bpf_program_t ngx_quic_reuseport_helper;
67
68
69 static ngx_command_t ngx_quic_bpf_commands[] = {
70
71 { ngx_string("quic_bpf"),
72 NGX_MAIN_CONF|NGX_DIRECT_CONF|NGX_CONF_FLAG,
73 ngx_conf_set_flag_slot,
74 0,
75 offsetof(ngx_quic_bpf_conf_t, enabled),
76 NULL },
77
78 ngx_null_command
79 };
80
81
82 static ngx_core_module_t ngx_quic_bpf_module_ctx = {
83 ngx_string("quic_bpf"),
84 ngx_quic_bpf_create_conf,
85 NULL
86 };
87
88
89 ngx_module_t ngx_quic_bpf_module = {
90 NGX_MODULE_V1,
91 &ngx_quic_bpf_module_ctx, /* module context */
92 ngx_quic_bpf_commands, /* module directives */
93 NGX_CORE_MODULE, /* module type */
94 NULL, /* init master */
95 ngx_quic_bpf_module_init, /* init module */
96 NULL, /* init process */
97 NULL, /* init thread */
98 NULL, /* exit thread */
99 NULL, /* exit process */
100 NULL, /* exit master */
101 NGX_MODULE_V1_PADDING
102 };
103
104
105 static void *
106 ngx_quic_bpf_create_conf(ngx_cycle_t *cycle)
107 {
108 ngx_quic_bpf_conf_t *bcf;
109
110 bcf = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_bpf_conf_t));
111 if (bcf == NULL) {
112 return NULL;
113 }
114
115 bcf->enabled = NGX_CONF_UNSET;
116 bcf->map_size = NGX_CONF_UNSET_UINT;
117
118 ngx_queue_init(&bcf->groups);
119
120 return bcf;
121 }
122
123
124 static ngx_int_t
125 ngx_quic_bpf_module_init(ngx_cycle_t *cycle)
126 {
127 ngx_uint_t i;
128 ngx_listening_t *ls;
129 ngx_core_conf_t *ccf;
130 ngx_pool_cleanup_t *cln;
131 ngx_quic_bpf_conf_t *bcf;
132
133 ccf = ngx_core_get_conf(cycle);
134 bcf = ngx_quic_bpf_get_conf(cycle);
135
136 ngx_conf_init_value(bcf->enabled, 0);
137
138 bcf->map_size = ccf->worker_processes * 4;
139
140 cln = ngx_pool_cleanup_add(cycle->pool, 0);
141 if (cln == NULL) {
142 goto failed;
143 }
144
145 cln->data = bcf;
146 cln->handler = ngx_quic_bpf_cleanup;
147
148 if (ngx_inherited && ngx_is_init_cycle(cycle->old_cycle)) {
149 if (ngx_quic_bpf_import_maps(cycle) != NGX_OK) {
150 goto failed;
151 }
152 }
153
154 ls = cycle->listening.elts;
155
156 for (i = 0; i < cycle->listening.nelts; i++) {
157 if (ls[i].quic && ls[i].reuseport) {
158 if (ngx_quic_bpf_group_add_socket(cycle, &ls[i]) != NGX_OK) {
159 goto failed;
160 }
161 }
162 }
163
164 if (ngx_quic_bpf_export_maps(cycle) != NGX_OK) {
165 goto failed;
166 }
167
168 return NGX_OK;
169
170 failed:
171
172 if (ngx_is_init_cycle(cycle->old_cycle)) {
173 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
174 "ngx_quic_bpf_module failed to initialize, check limits");
175
176 /* refuse to start */
177 return NGX_ERROR;
178 }
179
180 /*
181 * returning error now will lead to master process exiting immediately
182 * leaving worker processes orphaned, what is really unexpected.
183 * Instead, just issue a not about failed initialization and try
184 * to cleanup a bit. Still program can be already loaded to kernel
185 * for some reuseport groups, and there is no way to revert, so
186 * behaviour may be inconsistent.
187 */
188
189 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
190 "ngx_quic_bpf_module failed to initialize properly, ignored."
191 "please check limits and note that nginx state now "
192 "can be inconsistent and restart may be required");
193
194 return NGX_OK;
195 }
196
197
198 static void
199 ngx_quic_bpf_cleanup(void *data)
200 {
201 ngx_quic_bpf_conf_t *bcf = (ngx_quic_bpf_conf_t *) data;
202
203 ngx_queue_t *q;
204 ngx_quic_sock_group_t *grp;
205
206 for (q = ngx_queue_head(&bcf->groups);
207 q != ngx_queue_sentinel(&bcf->groups);
208 q = ngx_queue_next(q))
209 {
210 grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);
211
212 ngx_quic_bpf_close(ngx_cycle->log, grp->map_fd, "map");
213 }
214 }
215
216
217 static ngx_inline void
218 ngx_quic_bpf_close(ngx_log_t *log, int fd, const char *name)
219 {
220 if (close(fd) != -1) {
221 return;
222 }
223
224 ngx_log_error(NGX_LOG_EMERG, log, ngx_errno,
225 "quic bpf close %s fd:%i failed", name, fd);
226 }
227
228
229 static ngx_quic_sock_group_t *
230 ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf, ngx_listening_t *ls)
231 {
232 ngx_queue_t *q;
233 ngx_quic_sock_group_t *grp;
234
235 for (q = ngx_queue_head(&bcf->groups);
236 q != ngx_queue_sentinel(&bcf->groups);
237 q = ngx_queue_next(q))
238 {
239 grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);
240
241 if (ngx_cmp_sockaddr(ls->sockaddr, ls->socklen,
242 grp->sockaddr, grp->socklen, 1)
243 == NGX_OK)
244 {
245 return grp;
246 }
247 }
248
249 return NULL;
250 }
251
252
253 static ngx_quic_sock_group_t *
254 ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle, struct sockaddr *sa,
255 socklen_t socklen)
256 {
257 ngx_quic_bpf_conf_t *bcf;
258 ngx_quic_sock_group_t *grp;
259
260 bcf = ngx_quic_bpf_get_conf(cycle);
261
262 grp = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_sock_group_t));
263 if (grp == NULL) {
264 return NULL;
265 }
266
267 grp->socklen = socklen;
268 grp->sockaddr = ngx_palloc(cycle->pool, socklen);
269 if (grp->sockaddr == NULL) {
270 return NULL;
271 }
272 ngx_memcpy(grp->sockaddr, sa, socklen);
273
274 ngx_queue_insert_tail(&bcf->groups, &grp->queue);
275
276 return grp;
277 }
278
279
280 static ngx_quic_sock_group_t *
281 ngx_quic_bpf_create_group(ngx_cycle_t *cycle, ngx_listening_t *ls)
282 {
283 int progfd, failed, flags, rc;
284 ngx_quic_bpf_conf_t *bcf;
285 ngx_quic_sock_group_t *grp;
286
287 bcf = ngx_quic_bpf_get_conf(cycle);
288
289 if (!bcf->enabled) {
290 return NULL;
291 }
292
293 grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen);
294 if (grp == NULL) {
295 return NULL;
296 }
297
298 grp->map_fd = ngx_bpf_map_create(cycle->log, BPF_MAP_TYPE_SOCKHASH,
299 sizeof(uint64_t), sizeof(uint64_t),
300 bcf->map_size, 0);
301 if (grp->map_fd == -1) {
302 goto failed;
303 }
304
305 flags = fcntl(grp->map_fd, F_GETFD);
306 if (flags == -1) {
307 ngx_log_error(NGX_LOG_EMERG, cycle->log, errno,
308 "quic bpf getfd failed");
309 goto failed;
310 }
311
312 /* need to inherit map during binary upgrade after exec */
313 flags &= ~FD_CLOEXEC;
314
315 rc = fcntl(grp->map_fd, F_SETFD, flags);
316 if (rc == -1) {
317 ngx_log_error(NGX_LOG_EMERG, cycle->log, errno,
318 "quic bpf setfd failed");
319 goto failed;
320 }
321
322 ngx_bpf_program_link(&ngx_quic_reuseport_helper,
323 "ngx_quic_sockmap", grp->map_fd);
324
325 progfd = ngx_bpf_load_program(cycle->log, &ngx_quic_reuseport_helper);
326 if (progfd < 0) {
327 goto failed;
328 }
329
330 failed = 0;
331
332 if (setsockopt(ls->fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
333 &progfd, sizeof(int))
334 == -1)
335 {
336 ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_socket_errno,
337 "quic bpf setsockopt(SO_ATTACH_REUSEPORT_EBPF) failed");
338 failed = 1;
339 }
340
341 ngx_quic_bpf_close(cycle->log, progfd, "program");
342
343 if (failed) {
344 goto failed;
345 }
346
347 ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
348 "quic bpf sockmap created fd:%i", grp->map_fd);
349 return grp;
350
351 failed:
352
353 if (grp->map_fd != -1) {
354 ngx_quic_bpf_close(cycle->log, grp->map_fd, "map");
355 }
356
357 ngx_queue_remove(&grp->queue);
358
359 return NULL;
360 }
361
362
363 static ngx_quic_sock_group_t *
364 ngx_quic_bpf_get_group(ngx_cycle_t *cycle, ngx_listening_t *ls)
365 {
366 ngx_quic_bpf_conf_t *bcf, *old_bcf;
367 ngx_quic_sock_group_t *grp, *ogrp;
368
369 bcf = ngx_quic_bpf_get_conf(cycle);
370
371 grp = ngx_quic_bpf_find_group(bcf, ls);
372 if (grp) {
373 return grp;
374 }
375
376 old_bcf = ngx_quic_bpf_get_old_conf(cycle);
377
378 if (old_bcf == NULL) {
379 return ngx_quic_bpf_create_group(cycle, ls);
380 }
381
382 ogrp = ngx_quic_bpf_find_group(old_bcf, ls);
383 if (ogrp == NULL) {
384 return ngx_quic_bpf_create_group(cycle, ls);
385 }
386
387 grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen);
388 if (grp == NULL) {
389 return NULL;
390 }
391
392 grp->map_fd = dup(ogrp->map_fd);
393 if (grp->map_fd == -1) {
394 ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
395 "quic bpf failed to duplicate bpf map descriptor");
396
397 ngx_queue_remove(&grp->queue);
398
399 return NULL;
400 }
401
402 ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
403 "quic bpf sockmap fd duplicated old:%i new:%i",
404 ogrp->map_fd, grp->map_fd);
405
406 return grp;
407 }
408
409
410 static ngx_int_t
411 ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle, ngx_listening_t *ls)
412 {
413 uint64_t cookie;
414 ngx_quic_bpf_conf_t *bcf;
415 ngx_quic_sock_group_t *grp;
416
417 bcf = ngx_quic_bpf_get_conf(cycle);
418
419 grp = ngx_quic_bpf_get_group(cycle, ls);
420
421 if (grp == NULL) {
422 if (!bcf->enabled) {
423 return NGX_OK;
424 }
425
426 return NGX_ERROR;
427 }
428
429 grp->unused = 0;
430
431 cookie = ngx_quic_bpf_socket_key(ls->fd, cycle->log);
432 if (cookie == (uint64_t) NGX_ERROR) {
433 return NGX_ERROR;
434 }
435
436 /* map[cookie] = socket; for use in kernel helper */
437 if (ngx_bpf_map_update(grp->map_fd, &cookie, &ls->fd, BPF_ANY) == -1) {
438 ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
439 "quic bpf failed to update socket map key=%xL", cookie);
440 return NGX_ERROR;
441 }
442
443 ngx_log_debug4(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
444 "quic bpf sockmap fd:%d add socket:%d cookie:0x%xL worker:%d",
445 grp->map_fd, ls->fd, cookie, ls->worker);
446
447 /* do not inherit this socket */
448 ls->ignore = 1;
449
450 return NGX_OK;
451 }
452
453
454 static uint64_t
455 ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log)
456 {
457 uint64_t cookie;
458 socklen_t optlen;
459
460 optlen = sizeof(cookie);
461
462 if (getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &optlen) == -1) {
463 ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
464 "quic bpf getsockopt(SO_COOKIE) failed");
465
466 return (ngx_uint_t) NGX_ERROR;
467 }
468
469 return cookie;
470 }
471
472
473 static ngx_int_t
474 ngx_quic_bpf_export_maps(ngx_cycle_t *cycle)
475 {
476 u_char *p, *buf;
477 size_t len;
478 ngx_str_t *var;
479 ngx_queue_t *q;
480 ngx_core_conf_t *ccf;
481 ngx_quic_bpf_conf_t *bcf;
482 ngx_quic_sock_group_t *grp;
483
484 ccf = ngx_core_get_conf(cycle);
485 bcf = ngx_quic_bpf_get_conf(cycle);
486
487 len = sizeof(NGX_QUIC_BPF_VARNAME) + 1;
488
489 q = ngx_queue_head(&bcf->groups);
490
491 while (q != ngx_queue_sentinel(&bcf->groups)) {
492
493 grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);
494
495 q = ngx_queue_next(q);
496
497 if (grp->unused) {
498 /*
499 * map was inherited, but it is not used in this configuration;
500 * do not pass such map further and drop the group to prevent
501 * interference with changes during reload
502 */
503
504 ngx_quic_bpf_close(cycle->log, grp->map_fd, "map");
505 ngx_queue_remove(&grp->queue);
506
507 continue;
508 }
509
510 len += NGX_INT32_LEN + 1 + NGX_SOCKADDR_STRLEN + 1;
511 }
512
513 len++;
514
515 buf = ngx_palloc(cycle->pool, len);
516 if (buf == NULL) {
517 return NGX_ERROR;
518 }
519
520 p = ngx_cpymem(buf, NGX_QUIC_BPF_VARNAME "=",
521 sizeof(NGX_QUIC_BPF_VARNAME));
522
523 for (q = ngx_queue_head(&bcf->groups);
524 q != ngx_queue_sentinel(&bcf->groups);
525 q = ngx_queue_next(q))
526 {
527 grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);
528
529 p = ngx_sprintf(p, "%ud", grp->map_fd);
530
531 *p++ = NGX_QUIC_BPF_ADDRSEP;
532
533 p += ngx_sock_ntop(grp->sockaddr, grp->socklen, p,
534 NGX_SOCKADDR_STRLEN, 1);
535
536 *p++ = NGX_QUIC_BPF_VARSEP;
537 }
538
539 *p = '\0';
540
541 var = ngx_array_push(&ccf->env);
542 if (var == NULL) {
543 return NGX_ERROR;
544 }
545
546 var->data = buf;
547 var->len = sizeof(NGX_QUIC_BPF_VARNAME) - 1;
548
549 return NGX_OK;
550 }
551
552
553 static ngx_int_t
554 ngx_quic_bpf_import_maps(ngx_cycle_t *cycle)
555 {
556 int s;
557 u_char *inherited, *p, *v;
558 ngx_uint_t in_fd;
559 ngx_addr_t tmp;
560 ngx_quic_bpf_conf_t *bcf;
561 ngx_quic_sock_group_t *grp;
562
563 inherited = (u_char *) getenv(NGX_QUIC_BPF_VARNAME);
564
565 if (inherited == NULL) {
566 return NGX_OK;
567 }
568
569 bcf = ngx_quic_bpf_get_conf(cycle);
570
571 #if (NGX_SUPPRESS_WARN)
572 s = -1;
573 #endif
574
575 in_fd = 1;
576
577 for (p = inherited, v = p; *p; p++) {
578
579 switch (*p) {
580
581 case NGX_QUIC_BPF_ADDRSEP:
582
583 if (!in_fd) {
584 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
585 "quic bpf failed to parse inherited env");
586 return NGX_ERROR;
587 }
588 in_fd = 0;
589
590 s = ngx_atoi(v, p - v);
591 if (s == NGX_ERROR) {
592 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
593 "quic bpf failed to parse inherited map fd");
594 return NGX_ERROR;
595 }
596
597 v = p + 1;
598 break;
599
600 case NGX_QUIC_BPF_VARSEP:
601
602 if (in_fd) {
603 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
604 "quic bpf failed to parse inherited env");
605 return NGX_ERROR;
606 }
607 in_fd = 1;
608
609 grp = ngx_pcalloc(cycle->pool,
610 sizeof(ngx_quic_sock_group_t));
611 if (grp == NULL) {
612 return NGX_ERROR;
613 }
614
615 grp->map_fd = s;
616
617 if (ngx_parse_addr_port(cycle->pool, &tmp, v, p - v)
618 != NGX_OK)
619 {
620 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
621 "quic bpf failed to parse inherited"
622 " address '%*s'", p - v , v);
623
624 ngx_quic_bpf_close(cycle->log, s, "inherited map");
625
626 return NGX_ERROR;
627 }
628
629 grp->sockaddr = tmp.sockaddr;
630 grp->socklen = tmp.socklen;
631
632 grp->unused = 1;
633
634 ngx_queue_insert_tail(&bcf->groups, &grp->queue);
635
636 ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
637 "quic bpf sockmap inherited with "
638 "fd:%i address:%*s",
639 grp->map_fd, p - v, v);
640 v = p + 1;
641 break;
642
643 default:
644 break;
645 }
646 }
647
648 return NGX_OK;
649 }