Skip to content

Commit

Permalink
map: zero-allocation operations for common types
Browse files Browse the repository at this point in the history
Map keys and values are currently marshaled into []byte by souped
up versions of binary.Write and binary.Read. This allows users to
be blissfully unaware of compiler inserted padding on the Go side.
This is wasteful in case the Go in-memory representation matches
what the kernel expects because we need additional allocations.

Refactor syscall marshaling into a new package sysenc which
encapsulates the logic we need to determine whether a Go type
is safe for zero-allocation / zero-copy marshaling. The type
must be a pointer to or a slice of:

* A primitive type like uint32, ... or
* An array of valid types or
* A struct made up of valid types without any compiler
  inserted padding between fields

Per-CPU maps don't support zero-allocation operations for now,
but the new code already makes things a little bit cheaper.

Structs with trailing padding also don't benefit from the
optimization for now. Consider

    type padded struct { A uint32; B uint16 }

Allowing such a type creates an edge case: make([]padding, 1)
uses zero-allocation marshaling while make([]padding, 2)
doesn't, due to interior padding. It's simpler to skip such
types for now.

    goos: linux
    goarch: amd64
    pkg: github.com/cilium/ebpf
    cpu: 12th Gen Intel(R) Core(TM) i7-1260P
                                         │ unsafe.txt  │
                                         │   sec/op    │
    Marshaling/ValueUnmarshalReflect-16    356.1n ± 2%
    Marshaling/KeyMarshalReflect-16        368.6n ± 1%
    Marshaling/ValueBinaryUnmarshaler-16   378.6n ± 2%
    Marshaling/KeyBinaryMarshaler-16       356.2n ± 1%
    Marshaling/KeyValueUnsafe-16           328.0n ± 2%
    PerCPUMarshalling/reflection-16        1.232µ ± 1%

                                         │  unsafe.txt  │
                                         │     B/op     │
    Marshaling/ValueUnmarshalReflect-16    0.000 ± 0%
    Marshaling/KeyMarshalReflect-16        0.000 ± 0%
    Marshaling/ValueBinaryUnmarshaler-16   24.00 ± 0%
    Marshaling/KeyBinaryMarshaler-16       8.000 ± 0%
    Marshaling/KeyValueUnsafe-16           0.000 ± 0%
    PerCPUMarshalling/reflection-16        280.0 ± 0%

                                         │  unsafe.txt  │
                                         │  allocs/op   │
    Marshaling/ValueUnmarshalReflect-16    0.000 ± 0%
    Marshaling/KeyMarshalReflect-16        0.000 ± 0%
    Marshaling/ValueBinaryUnmarshaler-16   1.000 ± 0%
    Marshaling/KeyBinaryMarshaler-16       1.000 ± 0%
    Marshaling/KeyValueUnsafe-16           0.000 ± 0%
    PerCPUMarshalling/reflection-16        3.000 ± 0%

Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
  • Loading branch information
lmb committed Jun 21, 2023
1 parent f4b0232 commit be6a923
Show file tree
Hide file tree
Showing 14 changed files with 911 additions and 197 deletions.
5 changes: 3 additions & 2 deletions collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/kconfig"
"github.com/cilium/ebpf/internal/sysenc"
)

// CollectionOptions control loading a collection into the kernel.
Expand Down Expand Up @@ -175,12 +176,12 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error
return fmt.Errorf("section %s: offset %d(+%d) for variable %s is out of bounds", name, v.Offset, v.Size, vname)
}

b, err := marshalBytes(replacement, int(v.Size))
b, err := sysenc.Marshal(replacement, int(v.Size))
if err != nil {
return fmt.Errorf("marshaling constant replacement %s: %w", vname, err)
}

copy(cpy[v.Offset:v.Offset+v.Size], b)
b.Copy(cpy[v.Offset : v.Offset+v.Size])

replaced[vname] = true
}
Expand Down
2 changes: 1 addition & 1 deletion internal/endian_be.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import "encoding/binary"

// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.BigEndian
var NativeEndian = binary.BigEndian

// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "eb"
2 changes: 1 addition & 1 deletion internal/endian_le.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import "encoding/binary"

// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.LittleEndian
var NativeEndian = binary.LittleEndian

// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "el"
79 changes: 79 additions & 0 deletions internal/sysenc/buffer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package sysenc

import (
"unsafe"

"github.com/cilium/ebpf/internal/sys"
)

type Buffer struct {
ptr unsafe.Pointer
// Size of the buffer. -1 if created from UnsafeBuffer or when using
// zero-copy unmarshaling.
size int
}

const syscallPointerOnly = -1

func newBuffer(buf []byte) Buffer {
if len(buf) == 0 {
return Buffer{}
}
return Buffer{unsafe.Pointer(&buf[0]), len(buf)}
}

// UnsafeBuffer constructs a Buffer for zero-copy unmarshaling.
//
// [Pointer] is the only valid method to call on such a Buffer.
// Use [SyscallBuffer] instead if possible.
func UnsafeBuffer(ptr unsafe.Pointer) Buffer {
return Buffer{ptr, syscallPointerOnly}
}

// SyscallOutput prepares a Buffer for a syscall to write into.
//
// The buffer may point at the underlying memory of dst, in which case [Unmarshal]
// becomes a no-op.
//
// The contents of the buffer are undefined and may be non-zero.
func SyscallOutput(dst any, size int) Buffer {
if dstBuf := unsafeBackingMemory(dst); len(dstBuf) == size {
buf := newBuffer(dstBuf)
buf.size = syscallPointerOnly
return buf
}

return newBuffer(make([]byte, size))
}

// Copy the contents into dst.
//
// Returns the number of copied bytes.
func (b Buffer) Copy(dst []byte) int {
return copy(dst, b.unsafeBytes())
}

// Pointer returns the location where a syscall should write.
func (b Buffer) Pointer() sys.Pointer {
// NB: This deliberately ignores b.layout.valid() to support zero-copy
// marshaling / unmarshaling using unsafe.Pointer.
return sys.NewPointer(b.ptr)
}

// Unmarshal the buffer into the provided value.
//
// This is a no-op on a zero buffer.
func (b Buffer) Unmarshal(data any) error {
if b.size == syscallPointerOnly {
return nil
}

return Unmarshal(data, b.unsafeBytes())
}

func (b Buffer) unsafeBytes() []byte {
if b.size == syscallPointerOnly {
return nil
}
return unsafe.Slice((*byte)(b.ptr), b.size)
}
27 changes: 27 additions & 0 deletions internal/sysenc/buffer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package sysenc_test

import (
"testing"
"unsafe"

"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/sysenc"
qt "github.com/frankban/quicktest"
)

func TestZeroBuffer(t *testing.T) {
var zero sysenc.Buffer

qt.Assert(t, zero.Copy(make([]byte, 1)), qt.Equals, 0)
qt.Assert(t, zero.Pointer(), qt.Equals, sys.Pointer{})
qt.Assert(t, zero.Unmarshal(new(uint16)), qt.IsNotNil)
}

func TestUnsafeBuffer(t *testing.T) {
ptr := unsafe.Pointer(new(uint16))
buf := sysenc.UnsafeBuffer(ptr)

qt.Assert(t, buf.Copy(make([]byte, 1)), qt.Equals, 0)
qt.Assert(t, buf.Pointer(), qt.Equals, sys.NewPointer(ptr))
qt.Assert(t, buf.Unmarshal(new(uint16)), qt.IsNil)
}
3 changes: 3 additions & 0 deletions internal/sysenc/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package sysenc provides efficient conversion of Go values to system
// call interfaces.
package sysenc
149 changes: 149 additions & 0 deletions internal/sysenc/layout.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found at https://go.dev/LICENSE.

package sysenc

import (
"reflect"
"sync"
)

// dataLayout describes an abstract array of the form:
//
// [count]struct{
// contents [size]byte
// padding [padding]byte
// }
type dataLayout struct {
count int
size int
padding int
}

var invalidLayout = dataLayout{-1, -1, -1}

func (dl *dataLayout) valid() bool {
return *dl != invalidLayout
}

// length returns the total length including padding.
func (dl *dataLayout) length() int {
if !dl.valid() {
return 0
}

return dl.count * (dl.size + dl.padding)
}

// normalise transforms a layout so that count is either zero or one.
//
// [count]struct{ [size]byte }
// becomes:
// [0/1]struct{ [size*count]byte }
//
// Produces an invalid layout if the transformation would introduce interior
// padding.
func (dl *dataLayout) normalise() {
if !dl.valid() || dl.count <= 1 {
return
}

if dl.padding != 0 {
*dl = invalidLayout
return
}

size := dl.size * dl.count
*dl = dataLayout{1, size, 0}
}

var cachedLayouts sync.Map // map[reflect.Type]dataLayout

func layoutOf(data any) dataLayout {
if data == nil {
return invalidLayout
}

typ := reflect.TypeOf(data)
if typ.Kind() == reflect.Pointer {
typ = typ.Elem()
}

isSlice := false
if typ.Kind() == reflect.Slice {
// Slices are only allowed as the root type and are therefore not
// allowed in layoutOfRecurse.
typ = typ.Elem()
isSlice = true
}

var layout dataLayout
if cachedLayout, ok := cachedLayouts.Load(typ); ok {
layout = cachedLayout.(dataLayout)
} else {
layout = layoutOfRecurse(typ)
if typ.Kind() == reflect.Struct {
cachedLayouts.Store(typ, layout)
}
}

v := reflect.Indirect(reflect.ValueOf(data))
if !v.IsValid() {
// Nil pointer.
return invalidLayout
}

if isSlice {
layout.normalise()
if layout.valid() {
layout.count = v.Len()
}
}

return layout
}

func layoutOfRecurse(t reflect.Type) dataLayout {
switch t.Kind() {
case reflect.Array:
layout := layoutOfRecurse(t.Elem())
layout.normalise()
if layout.valid() {
layout.count = t.Len()
}
return layout

case reflect.Struct:
sum := 0
offset := uintptr(0)
for i, n := 0, t.NumField(); i < n; i++ {
field := t.Field(i)
if !field.IsExported() && field.Name != "_" {
return invalidLayout
}
layout := layoutOfRecurse(field.Type)
layout.normalise()
if !layout.valid() {
// field.Type contains padding.
return invalidLayout
}
if field.Offset != offset {
// There is padding before this field.
return invalidLayout
}
sum += layout.size
offset = field.Offset + uintptr(layout.size)
}
return dataLayout{1, sum, int(t.Size()) - sum}

case reflect.Bool,
reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
return dataLayout{1, int(t.Size()), 0}

default:
return invalidLayout
}
}
Loading

0 comments on commit be6a923

Please sign in to comment.