8676
|
1 #include <errno.h>
|
|
2 #include <linux/string.h>
|
|
3 #include <linux/udp.h>
|
|
4 #include <linux/bpf.h>
|
|
5 /*
|
|
6 * the bpf_helpers.h is not included into linux-headers, only available
|
|
7 * with kernel sources in "tools/lib/bpf/bpf_helpers.h" or in libbpf.
|
|
8 */
|
|
9 #include <bpf/bpf_helpers.h>
|
|
10
|
|
11
|
|
12 #if !defined(SEC)
|
|
13 #define SEC(NAME) __attribute__((section(NAME), used))
|
|
14 #endif
|
|
15
|
|
16
|
|
17 #if defined(LICENSE_GPL)
|
|
18
|
|
19 /*
|
|
20 * To see debug:
|
|
21 *
|
|
22 * echo 1 > /sys/kernel/debug/tracing/events/bpf_trace/enable
|
|
23 * cat /sys/kernel/debug/tracing/trace_pipe
|
|
24 * echo 0 > /sys/kernel/debug/tracing/events/bpf_trace/enable
|
|
25 */
|
|
26
|
|
27 #define debugmsg(fmt, ...) \
|
|
28 do { \
|
|
29 char __buf[] = fmt; \
|
|
30 bpf_trace_printk(__buf, sizeof(__buf), ##__VA_ARGS__); \
|
|
31 } while (0)
|
|
32
|
|
33 #else
|
|
34
|
|
35 #define debugmsg(fmt, ...)
|
|
36
|
|
37 #endif
|
|
38
|
|
39 char _license[] SEC("license") = LICENSE;
|
|
40
|
|
41 /*****************************************************************************/
|
|
42
|
|
43 #define NGX_QUIC_PKT_LONG 0x80 /* header form */
|
|
44 #define NGX_QUIC_SERVER_CID_LEN 20
|
|
45
|
|
46
|
|
47 #define advance_data(nbytes) \
|
|
48 offset += nbytes; \
|
|
49 if (start + offset > end) { \
|
|
50 debugmsg("cannot read %ld bytes at offset %ld", nbytes, offset); \
|
|
51 goto failed; \
|
|
52 } \
|
|
53 data = start + offset - 1;
|
|
54
|
|
55
|
|
56 #define ngx_quic_parse_uint64(p) \
|
|
57 (((__u64)(p)[0] << 56) | \
|
|
58 ((__u64)(p)[1] << 48) | \
|
|
59 ((__u64)(p)[2] << 40) | \
|
|
60 ((__u64)(p)[3] << 32) | \
|
|
61 (p)[4] << 24 | \
|
|
62 (p)[5] << 16 | \
|
|
63 (p)[6] << 8 | \
|
|
64 (p)[7])
|
|
65
|
|
66 /*
|
|
67 * actual map object is created by the "bpf" system call,
|
|
68 * all pointers to this variable are replaced by the bpf loader
|
|
69 */
|
|
70 struct bpf_map_def SEC("maps") ngx_quic_sockmap;
|
|
71
|
|
72
|
|
73 SEC(PROGNAME)
|
|
74 int ngx_quic_select_socket_by_dcid(struct sk_reuseport_md *ctx)
|
|
75 {
|
|
76 int rc;
|
|
77 __u64 key;
|
|
78 size_t len, offset;
|
|
79 unsigned char *start, *end, *data, *dcid;
|
|
80
|
|
81 start = ctx->data;
|
|
82 end = (unsigned char *) ctx->data_end;
|
|
83 offset = 0;
|
|
84
|
|
85 advance_data(sizeof(struct udphdr)); /* skip UDP header */
|
|
86 advance_data(1); /* QUIC flags */
|
|
87
|
|
88 if (data[0] & NGX_QUIC_PKT_LONG) {
|
|
89
|
|
90 advance_data(4); /* skip QUIC version */
|
|
91 len = data[0]; /* read DCID length */
|
|
92
|
|
93 if (len < 8) {
|
|
94 /* it's useless to search for key in such short DCID */
|
|
95 return SK_PASS;
|
|
96 }
|
|
97
|
|
98 advance_data(1); /* skip DCID len */
|
|
99
|
|
100 } else {
|
|
101 len = NGX_QUIC_SERVER_CID_LEN;
|
|
102 }
|
|
103
|
|
104 dcid = &data[1];
|
|
105 advance_data(len); /* we expect the packet to have full DCID */
|
|
106
|
|
107 /* make verifier happy */
|
|
108 if (dcid + sizeof(__u64) > end) {
|
|
109 goto failed;
|
|
110 }
|
|
111
|
|
112 key = ngx_quic_parse_uint64(dcid);
|
|
113
|
|
114 rc = bpf_sk_select_reuseport(ctx, &ngx_quic_sockmap, &key, 0);
|
|
115
|
|
116 switch (rc) {
|
|
117 case 0:
|
|
118 debugmsg("nginx quic socket selected by key 0x%x", key);
|
|
119 return SK_PASS;
|
|
120
|
|
121 /* kernel returns positive error numbers, errno.h defines positive */
|
|
122 case -ENOENT:
|
|
123 debugmsg("nginx quic default route for key 0x%x", key);
|
|
124 /* let the default reuseport logic decide which socket to choose */
|
|
125 return SK_PASS;
|
|
126
|
|
127 default:
|
|
128 debugmsg("nginx quic bpf_sk_select_reuseport err: %d key 0x%x",
|
|
129 rc, key);
|
|
130 goto failed;
|
|
131 }
|
|
132
|
|
133 failed:
|
|
134 /*
|
|
135 * SK_DROP will generate ICMP, but we may want to process "invalid" packet
|
|
136 * in userspace quic to investigate further and finally react properly
|
|
137 * (maybe ignore, maybe send something in response or close connection)
|
|
138 */
|
|
139 return SK_PASS;
|
|
140 }
|