Skip to content

Commit

Permalink
net: filter: Just In Time compiler for x86-64
Browse files Browse the repository at this point in the history
In order to speedup packet filtering, here is an implementation of a
JIT compiler for x86_64

It is disabled by default, and must be enabled by the admin.

echo 1 >/proc/sys/net/core/bpf_jit_enable

It uses module_alloc() and module_free() to get memory in the 2GB text
kernel range since we call helpers functions from the generated code.

EAX : BPF A accumulator
EBX : BPF X accumulator
RDI : pointer to skb   (first argument given to JIT function)
RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
r9d : skb->len - skb->data_len (headlen)
r8  : skb->data

To get a trace of generated code, use :

echo 2 >/proc/sys/net/core/bpf_jit_enable

Example of generated code :

# tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24

flen=18 proglen=147 pass=3 image=ffffffffa00b5000
JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60
JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00
JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00
JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be
JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0
JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00
JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00
JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24
JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31
JIT code: ffffffffa00b5090: c0 c9 c3

BPF program is 144 bytes long, so native program is almost same size ;)

(000) ldh      [12]
(001) jeq      #0x800           jt 2    jf 8
(002) ld       [26]
(003) and      #0xffffff00
(004) jeq      #0xc0a81400      jt 16   jf 5
(005) ld       [30]
(006) and      #0xffffff00
(007) jeq      #0xc0a81400      jt 16   jf 17
(008) jeq      #0x806           jt 10   jf 9
(009) jeq      #0x8035          jt 10   jf 17
(010) ld       [28]
(011) and      #0xffffff00
(012) jeq      #0xc0a81400      jt 16   jf 13
(013) ld       [38]
(014) and      #0xffffff00
(015) jeq      #0xc0a81400      jt 16   jf 17
(016) ret      #65535
(017) ret      #0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and davem330 committed Apr 28, 2011
1 parent b678027 commit 0a14842
Show file tree
Hide file tree
Showing 14 changed files with 918 additions and 62 deletions.
11 changes: 11 additions & 0 deletions Documentation/sysctl/net.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ Table : Subdirectories in /proc/sys/net
1. /proc/sys/net/core - Network core options
-------------------------------------------------------

bpf_jit_enable
--------------

This enables Berkeley Packet Filter Just in Time compiler.
Currently supported on x86_64 architecture, bpf_jit provides a framework
to speed packet filtering, the one used by tcpdump/libpcap for example.
Values :
0 - disable the JIT (default value)
1 - enable the JIT
2 - enable the JIT and ask the compiler to emit traces on kernel log.

rmem_default
------------

Expand Down
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -4372,6 +4372,7 @@ S: Maintained
F: net/ipv4/
F: net/ipv6/
F: include/net/ip*
F: arch/x86/net/*

NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK)
M: Paul Moore <paul.moore@hp.com>
Expand Down
1 change: 1 addition & 0 deletions arch/x86/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ obj-y += vdso/
obj-$(CONFIG_IA32_EMULATION) += ia32/

obj-y += platform/
obj-y += net/
1 change: 1 addition & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ config X86
select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
select ARCH_NO_SYSDEV_OPS
select HAVE_BPF_JIT if X86_64

config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
Expand Down
4 changes: 4 additions & 0 deletions arch/x86/net/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#
# Arch-specific network modules
#
obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
140 changes: 140 additions & 0 deletions arch/x86/net/bpf_jit.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/* bpf_jit.S : BPF JIT helper functions
*
* Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>

/*
* Calling convention :
* rdi : skb pointer
* esi : offset of byte(s) to fetch in skb (can be scratched)
* r8 : copy of skb->data
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r8

sk_load_word_ind:
.globl sk_load_word_ind

add %ebx,%esi /* offset += X */
# test %esi,%esi /* if (offset < 0) goto bpf_error; */
js bpf_error

sk_load_word:
.globl sk_load_word

mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
jle bpf_slow_path_word
mov (SKBDATA,%rsi),%eax
bswap %eax /* ntohl() */
ret


sk_load_half_ind:
.globl sk_load_half_ind

add %ebx,%esi /* offset += X */
js bpf_error

sk_load_half:
.globl sk_load_half

mov %r9d,%eax
sub %esi,%eax # hlen - offset
cmp $1,%eax
jle bpf_slow_path_half
movzwl (SKBDATA,%rsi),%eax
rol $8,%ax # ntohs()
ret

sk_load_byte_ind:
.globl sk_load_byte_ind
add %ebx,%esi /* offset += X */
js bpf_error

sk_load_byte:
.globl sk_load_byte

cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
jle bpf_slow_path_byte
movzbl (SKBDATA,%rsi),%eax
ret

/**
* sk_load_byte_msh - BPF_S_LDX_B_MSH helper
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
* Inputs : %esi is the offset value, already known positive
*/
ENTRY(sk_load_byte_msh)
CFI_STARTPROC
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
and $15,%bl
shl $2,%bl
ret
CFI_ENDPROC
ENDPROC(sk_load_byte_msh)

bpf_error:
# force a return 0 from jit handler
xor %eax,%eax
mov -8(%rbp),%rbx
leaveq
ret

/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
push %rdi; /* save skb */ \
push %r9; \
push SKBDATA; \
/* rsi already has offset */ \
mov $LEN,%ecx; /* len */ \
lea -12(%rbp),%rdx; \
call skb_copy_bits; \
test %eax,%eax; \
pop SKBDATA; \
pop %r9; \
pop %rdi


bpf_slow_path_word:
bpf_slow_path_common(4)
js bpf_error
mov -12(%rbp),%eax
bswap %eax
ret

bpf_slow_path_half:
bpf_slow_path_common(2)
js bpf_error
mov -12(%rbp),%ax
rol $8,%ax
movzwl %ax,%eax
ret

bpf_slow_path_byte:
bpf_slow_path_common(1)
js bpf_error
movzbl -12(%rbp),%eax
ret

bpf_slow_path_byte_msh:
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
bpf_slow_path_common(1)
js bpf_error
movzbl -12(%rbp),%eax
and $15,%al
shl $2,%al
xchg %eax,%ebx
ret
Loading

0 comments on commit 0a14842

Please sign in to comment.