diff --git a/features/doc.go b/features/doc.go new file mode 100644 index 000000000..f44510145 --- /dev/null +++ b/features/doc.go @@ -0,0 +1,2 @@ +// Package features allows probing for BPF features available to the calling process. +package features diff --git a/features/map.go b/features/map.go new file mode 100644 index 000000000..f023f56fe --- /dev/null +++ b/features/map.go @@ -0,0 +1,182 @@ +package features + +import ( + "errors" + "fmt" + "os" + "sync" + "unsafe" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal" + "github.com/cilium/ebpf/internal/unix" +) + +type MapCache struct { + sync.Mutex + mapTypes map[ebpf.MapType]error +} + +var ( + mapCache MapCache +) + +func init() { + mapCache.mapTypes = make(map[ebpf.MapType]error) +} + +func createMapTypeAttr(mt ebpf.MapType) *internal.BPFMapCreateAttr { + var ( + keySize uint32 = 4 + valueSize uint32 = 4 + maxEntries uint32 = 1 + innerMapFd uint32 + flags uint32 + btfKeyTypeID uint32 + btfValueTypeID uint32 + btfFd uint32 + ) + + // switch on map types to generate correct bpfMapCreateAttr + switch mt { + case ebpf.StackTrace: + // valueSize needs to be sizeof(uint64) + valueSize = 8 + case ebpf.LPMTrie: + // keySize and valueSize need to be sizeof(struct{u32 + u8}) + 1 + padding = 8 + // BPF_F_NO_PREALLOC needs to be set + // checked at allocation time for lpm_trie maps + keySize = 8 + valueSize = 8 + flags = unix.BPF_F_NO_PREALLOC + case ebpf.ArrayOfMaps, ebpf.HashOfMaps: + // assign invalid innerMapFd to pass validation check + // will return EBADF + innerMapFd = ^uint32(0) + case ebpf.CGroupStorage, ebpf.PerCPUCGroupStorage: + // keySize needs to be sizeof(struct{u32 + u64}) = 12 (+ padding = 16) + // by using unsafe.Sizeof(int) we are making sure that this works on 32bit and 64bit archs + // checked at allocation time + var align int + keySize = uint32(8 + unsafe.Sizeof(align)) + maxEntries = 0 + case ebpf.Queue, ebpf.Stack: + // keySize needs to be 0, see alloc_check for queue and stack maps + keySize = 0 + case ebpf.RingBuf: + // keySize and valueSize need to be 0 + // maxEntries needs to be power of 2 and PAGE_ALIGNED + // checked at allocation time + keySize = 0 + valueSize = 0 + maxEntries = uint32(os.Getpagesize()) + case ebpf.SkStorage, ebpf.InodeStorage, ebpf.TaskStorage: + // maxEntries needs to be 0 + // BPF_F_NO_PREALLOC needs to be set + // btf* fields need to be set + // see alloc_check for local_storage map types + maxEntries = 0 + flags = unix.BPF_F_NO_PREALLOC + btfKeyTypeID = 1 // BTF_KIND_INT + btfValueTypeID = 3 // BTF_KIND_ARRAY + btfFd = ^uint32(0) + } + + return &internal.BPFMapCreateAttr{ + MapType: uint32(mt), + KeySize: keySize, + ValueSize: valueSize, + MaxEntries: maxEntries, + InnerMapFd: innerMapFd, + Flags: flags, + BTFKeyTypeID: btfKeyTypeID, + BTFValueTypeID: btfValueTypeID, + BTFFd: btfFd, + } + +} + +// HaveMapType probes the running kernel for the availability of the specified map type. +// Return values have the following semantics: +// +// err == nil: The feature is available. +// errors.Is(err, ebpf.ErrNotSupported): The feature is not available. +// err != nil: Any errors encountered during probe execution, wrapped. +// +// Note that the latter case may include false negatives, and that map creation may succeed +// despite an error being returned. Some map types cannot reliably be probed and will also +// return error. Only `nil` and `ebpf.ErrNotSupported` are conclusive. +// +// Probe results are cached and persist throughout any process capability changes. +func HaveMapType(mt ebpf.MapType) error { + if err := validateMaptype(mt); err != nil { + return err + } + + return haveMapType(mt) +} + +func validateMaptype(mt ebpf.MapType) error { + if mt > mt.Max() { + return os.ErrInvalid + } + + if mt == ebpf.StructOpts { + // A probe for a StructOpts map has vmlinux BTF requirements we currently cannot meet + // Once we figure out how to add a working probe in this package, we can remove this check + return errors.New("a probe for MapType StructOpts isn't implemented") + } + + return nil +} + +func haveMapType(mt ebpf.MapType) error { + mapCache.Lock() + defer mapCache.Unlock() + err, ok := mapCache.mapTypes[mt] + if ok { + return err + } + + _, err = internal.BPFMapCreate(createMapTypeAttr(mt)) + + // For nested and storage map types we accept EBADF as indicator that these maps are supported + if errors.Is(err, unix.EBADF) { + if isMapOfMaps(mt) || isStorageMap(mt) { + err = nil + } + } + + // EINVAL occurs when attempting to create a map with an unknown type. + // E2BIG occurs when BPFMapCreateAttr contains non-zero bytes past the end + // of the struct known by the running kernel, meaning the kernel is too old + // to support the given map type. + if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.E2BIG) { + err = ebpf.ErrNotSupported + } + + if err != nil && err != ebpf.ErrNotSupported { + err = fmt.Errorf("unexpected error during feature probe: %w", err) + } + + mapCache.mapTypes[mt] = err + + return err +} + +func isMapOfMaps(mt ebpf.MapType) bool { + switch mt { + case ebpf.ArrayOfMaps, ebpf.HashOfMaps: + return true + } + return false +} + +func isStorageMap(mt ebpf.MapType) bool { + switch mt { + case ebpf.SkStorage, ebpf.InodeStorage, ebpf.TaskStorage: + return true + } + + return false +} diff --git a/features/map_test.go b/features/map_test.go new file mode 100644 index 000000000..71b416bc3 --- /dev/null +++ b/features/map_test.go @@ -0,0 +1,87 @@ +package features + +import ( + "fmt" + "math" + "os" + "testing" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/internal/testutils" +) + +var mapTypeMinVersion = map[ebpf.MapType]string{ + ebpf.Hash: "3.19", + ebpf.Array: "3.19", + ebpf.ProgramArray: "4.2", + ebpf.PerfEventArray: "4.3", + ebpf.PerCPUHash: "4.6", + ebpf.PerCPUArray: "4.6", + ebpf.StackTrace: "4.6", + ebpf.CGroupArray: "4.8", + ebpf.LRUHash: "4.10", + ebpf.LRUCPUHash: "4.10", + ebpf.LPMTrie: "4.11", + ebpf.ArrayOfMaps: "4.12", + ebpf.HashOfMaps: "4.12", + ebpf.DevMap: "4.14", + ebpf.SockMap: "4.14", + ebpf.CPUMap: "4.15", + ebpf.XSKMap: "4.18", + ebpf.SockHash: "4.18", + ebpf.CGroupStorage: "4.19", + ebpf.ReusePortSockArray: "4.19", + ebpf.PerCPUCGroupStorage: "4.20", + ebpf.Queue: "4.20", + ebpf.Stack: "4.20", + ebpf.SkStorage: "5.2", + ebpf.DevMapHash: "5.4", + ebpf.StructOpts: "5.6", // requires vmlinux BTF, skip for now + ebpf.RingBuf: "5.8", + ebpf.InodeStorage: "5.10", + ebpf.TaskStorage: "5.11", +} + +func TestHaveMapType(t *testing.T) { + for mt := ebpf.UnspecifiedMap + 1; mt <= mt.Max(); mt++ { + minVersion, ok := mapTypeMinVersion[mt] + if !ok { + // In cases were a new map type wasn't added to testCases + // we should make sure the test runs anyway and fails on old kernels + minVersion = "0.0" + } + + feature := fmt.Sprintf("map type %s", mt.String()) + + t.Run(mt.String(), func(t *testing.T) { + // skip StructOpts test as its not implemented + if mt == ebpf.StructOpts { + t.Skip("Test for map type StructOpts requires working probe") + } + + testutils.SkipOnOldKernel(t, minVersion, feature) + + if err := HaveMapType(mt); err != nil { + t.Fatalf("Map type %s isn't supported even though kernel is at least %s: %v", mt.String(), minVersion, err) + } + }) + + } + +} + +func TestHaveMapTypeUnsupported(t *testing.T) { + if err := haveMapType(ebpf.MapType(math.MaxUint32)); err != ebpf.ErrNotSupported { + t.Fatalf("Expected ebpf.ErrNotSupported but was: %v", err) + } +} + +func TestHaveMapTypeInvalid(t *testing.T) { + if err := HaveMapType(ebpf.MapType(math.MaxUint32)); err != os.ErrInvalid { + t.Fatalf("Expected os.ErrInvalid but was: %v", err) + } + + if err := HaveMapType(ebpf.MapType(ebpf.StructOpts)); err == nil { + t.Fatal("Expected but was nil") + } +} diff --git a/internal/unix/types_linux.go b/internal/unix/types_linux.go index 0a18eaf0c..e502b039d 100644 --- a/internal/unix/types_linux.go +++ b/internal/unix/types_linux.go @@ -20,10 +20,11 @@ const ( EPERM = linux.EPERM ESRCH = linux.ESRCH ENODEV = linux.ENODEV + EBADF = linux.EBADF + E2BIG = linux.E2BIG // ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP ENOTSUPP = syscall.Errno(0x20c) - EBADF = linux.EBADF BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE BPF_F_RDONLY = linux.BPF_F_RDONLY diff --git a/internal/unix/types_other.go b/internal/unix/types_other.go index 1b06defc0..d99b225e9 100644 --- a/internal/unix/types_other.go +++ b/internal/unix/types_other.go @@ -21,6 +21,7 @@ const ( ESRCH = syscall.ESRCH ENODEV = syscall.ENODEV EBADF = syscall.Errno(0) + E2BIG = syscall.Errno(0) // ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP ENOTSUPP = syscall.Errno(0x20c) diff --git a/types.go b/types.go index 441a82fe4..c45dd642e 100644 --- a/types.go +++ b/types.go @@ -10,6 +10,11 @@ import ( // that will be initialized in the kernel. type MapType uint32 +// Max returns the latest supported MapType. +func (_ MapType) Max() MapType { + return maxMapType - 1 +} + // All the various map types that can be created const ( UnspecifiedMap MapType = iota @@ -85,10 +90,17 @@ const ( SkStorage // DevMapHash - Hash-based indexing scheme for references to network devices. DevMapHash + // StructOps - This map holds a kernel struct with its function pointer implemented in a BPF + // program. StructOpts + // RingBuf - Similar to PerfEventArray, but shared across all CPUs. RingBuf + // InodeStorage - Specialized local storage map for inodes. InodeStorage + // TaskStorage - Specialized local storage map for task_struct. TaskStorage + // maxMapType - Bound enum of MapTypes, has to be last in enum. + maxMapType ) // hasPerCPUValue returns true if the Map stores a value per CPU.