errors: remove verifier truncation heuristic, rely on ENOSPC

In order to capture full verifier errors of large programs, the caller can choose to repeatedly retry loading the program with progressively larger log buffers, since the important info is always at the end of the log. In doing so, it's incredibly likely for the last character of the buffer to be a newline, resulting in the truncated flag being set to false when it should instead be set to true, regardless of the kernel's ENOSPC return code. This commit removes the newline heuristic that currently aims to detect log truncation. Instead, we now require ENOSPC to be sent by the kernel in order for the Truncated flag to be set in the VerifierError. The heuristic was initially introduced for parsing BTF verifier errors, since there's a latent kernel bug that causes ENOSPC only being returned on a successful BTF load. Unfortunately, it turns out that not all BTF verifier errors end in a newline either (see calls to btf_verifier_log()), making the newline heuristic ineffective for BTF logs as well. ErrorWithLog now takes an ancillary error from a second load operation that is used to determine truncation status. This second error will not be wrapped into the VerifierError. Signed-off-by: Timo Beckers <timo@isovalent.com>
cilium · Aug 19, 2022 · 0076beb · 0076beb
1 parent 61d01d8
commit 0076beb
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 50 deletions.
diff --git a/btf/btf.go b/btf/btf.go
@@ -737,9 +737,13 @@ func NewHandle(spec *Spec) (*Handle, error) {
 		attr.BtfLogBuf = sys.NewSlicePointer(logBuf)
 		attr.BtfLogSize = uint32(len(logBuf))
 		attr.BtfLogLevel = 1
-		// NB: The syscall will never return ENOSPC as of 5.18-rc4.
-		_, _ = sys.BtfLoad(attr)
-		return nil, internal.ErrorWithLog(err, logBuf)
+		_, ve := sys.BtfLoad(attr)
+
+		// Up until at least kernel 6.0, the BTF verifier does not return ENOSPC
+		// if there are other verification errors. ENOSPC is only returned when
+		// the BTF blob is correct, a log was requested, and the provided buffer
+		// is too small.
+		return nil, internal.ErrorWithLog(err, ve, logBuf)
 	}
 
 	return &Handle{fd, attr.BtfSize}, nil

diff --git a/internal/errors.go b/internal/errors.go
@@ -2,39 +2,42 @@ package internal
 
 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"io"
 	"strings"
+
+	"github.com/cilium/ebpf/internal/unix"
 )
 
-// ErrorWithLog returns an error which includes logs from the kernel verifier.
+// ErrorWithLog wraps err in a VerifierError that includes the parsed verifier
+// log buffer.
 //
 // The default error output is a summary of the full log. The latter can be
 // accessed via VerifierError.Log or by formatting the error, see Format.
 //
-// A set of heuristics is used to determine whether the log has been truncated.
-func ErrorWithLog(err error, log []byte) *VerifierError {
+// A secondary error (e.g. from a retried load with verifier log enabled)
+// can be provided that is checked for ENOSPC to determine if the kernel
+// truncated the log.
+func ErrorWithLog(err, ve error, log []byte) *VerifierError {
 	const whitespace = "\t\r\v\n "
 
+	// Either error can be ENOSPC depending on whether or now the verifier
+	// was explicitly enabled by the caller.
+	truncated := false
+	if errors.Is(err, unix.ENOSPC) || errors.Is(ve, unix.ENOSPC) {
+		truncated = true
+	}
+
 	// Convert verifier log C string by truncating it on the first 0 byte
 	// and trimming trailing whitespace before interpreting as a Go string.
-	truncated := false
 	if i := bytes.IndexByte(log, 0); i != -1 {
-		if i > 0 && i == len(log)-1 && log[i-1] != '\n' {
-			// The null byte is at the end of the buffer and it's not preceded
-			// by a newline character. Most likely the buffer was too short.
-			truncated = true
-		}
-
 		log = log[:i]
-	} else if len(log) > 0 {
-		// No null byte? Dodgy!
-		truncated = true
 	}
 
 	log = bytes.Trim(log, whitespace)
 	if len(log) == 0 {
-		return &VerifierError{err, nil, false}
+		return &VerifierError{err, nil, truncated}
 	}
 
 	logLines := bytes.Split(log, []byte{'\n'})

diff --git a/internal/errors_test.go b/internal/errors_test.go
@@ -21,62 +21,60 @@ func TestVerifierErrorWhitespace(t *testing.T) {
 		0, 0, // trailing NUL bytes
 	)
 
-	err := ErrorWithLog(errors.New("test"), b)
+	err := ErrorWithLog(errors.New("test"), nil, b)
 	qt.Assert(t, err.Error(), qt.Equals, "test: unreachable insn 28")
 
-	err = ErrorWithLog(errors.New("test"), nil)
+	err = ErrorWithLog(errors.New("test"), nil, nil)
 	qt.Assert(t, err.Error(), qt.Equals, "test")
 
-	err = ErrorWithLog(errors.New("test"), []byte("\x00"))
+	err = ErrorWithLog(errors.New("test"), nil, []byte("\x00"))
 	qt.Assert(t, err.Error(), qt.Equals, "test")
 
-	err = ErrorWithLog(errors.New("test"), []byte(" "))
+	err = ErrorWithLog(errors.New("test"), nil, []byte(" "))
 	qt.Assert(t, err.Error(), qt.Equals, "test")
 }
 
-func TestVerifierError(t *testing.T) {
-	for _, test := range []struct {
-		name string
-		log  string
-	}{
-		{"missing null", "foo"},
-		{"missing newline before null", "foo\x00"},
-	} {
-		t.Run("truncate "+test.name, func(t *testing.T) {
-			ve := ErrorWithLog(syscall.ENOENT, []byte(test.log))
-			qt.Assert(t, ve, qt.IsNotNil, qt.Commentf("should return error"))
-			qt.Assert(t, ve.Truncated, qt.IsTrue, qt.Commentf("should be truncated"))
-		})
-	}
+func TestVerifierErrorWrapping(t *testing.T) {
+	ve := ErrorWithLog(unix.ENOENT, nil, nil)
+	qt.Assert(t, ve, qt.ErrorIs, unix.ENOENT, qt.Commentf("should wrap the primary error"))
+	qt.Assert(t, ve.Truncated, qt.IsFalse, qt.Commentf("no ENOSPC specified, should not be truncated"))
+
+	ve = ErrorWithLog(unix.EINVAL, unix.ENOSPC, nil)
+	qt.Assert(t, ve, qt.ErrorIs, unix.EINVAL, qt.Commentf("should wrap the primary error"))
+	qt.Assert(t, ve, qt.Not(qt.ErrorIs), unix.ENOSPC, qt.Commentf("should not wrap the secondary error"))
+	qt.Assert(t, ve.Truncated, qt.IsTrue, qt.Commentf("kernel signaled log buffer was full, should be truncated"))
 
-	ve := ErrorWithLog(syscall.ENOENT, nil)
-	qt.Assert(t, ve, qt.IsNotNil, qt.Commentf("should return error without log or logErr"))
+	ve = ErrorWithLog(unix.EINVAL, unix.ENOSPC, []byte("foo"))
+	qt.Assert(t, ve, qt.ErrorIs, unix.EINVAL, qt.Commentf("should wrap the primary error"))
+	qt.Assert(t, ve.Error(), qt.Contains, "foo", qt.Commentf("verifier log should appear in error string"))
 
+	ve = ErrorWithLog(unix.ENOSPC, nil, []byte("foo"))
+	qt.Assert(t, ve, qt.ErrorIs, unix.ENOSPC, qt.Commentf("should wrap the primary error"))
+	qt.Assert(t, ve.Error(), qt.Contains, "foo", qt.Commentf("verifier log should appear in error string"))
+	qt.Assert(t, ve.Truncated, qt.IsTrue, qt.Commentf("kernel signaled log buffer was full, should be truncated"))
+}
+
+func TestVerifierErrorSummaries(t *testing.T) {
+	// Suppress the last line containing 'processed ... insns'.
 	errno524 := readErrorFromFile(t, "testdata/errno524.log")
-	t.Log(errno524)
 	qt.Assert(t, errno524.Error(), qt.Contains, "JIT doesn't support bpf-to-bpf calls")
 	qt.Assert(t, errno524.Error(), qt.Not(qt.Contains), "processed 39 insns")
 
+	// Include the previous line if the current one starts with a tab.
 	invalidMember := readErrorFromFile(t, "testdata/invalid-member.log")
-	t.Log(invalidMember)
 	qt.Assert(t, invalidMember.Error(), qt.Contains, "STRUCT task_struct size=7744 vlen=218: cpus_mask type_id=109 bitfield_size=0 bits_offset=7744 Invalid member")
 
+	// Only include the last line.
 	issue43 := readErrorFromFile(t, "testdata/issue-43.log")
-	t.Log(issue43)
 	qt.Assert(t, issue43.Error(), qt.Contains, "[11] FUNC helper_func2 type_id=10 vlen != 0")
 	qt.Assert(t, issue43.Error(), qt.Not(qt.Contains), "[10] FUNC_PROTO (anon) return=3 args=(3 arg)")
 
-	truncated := readErrorFromFile(t, "testdata/truncated.log")
-	t.Log(truncated)
-	qt.Assert(t, truncated.Truncated, qt.IsTrue)
-	qt.Assert(t, truncated.Error(), qt.Contains, "str_off: 3166088: str_len: 228")
-
+	// Include instruction that caused invalid register access.
 	invalidR0 := readErrorFromFile(t, "testdata/invalid-R0.log")
-	t.Log(invalidR0)
 	qt.Assert(t, invalidR0.Error(), qt.Contains, "0: (95) exit: R0 !read_ok")
 
+	// Include symbol that doesn't match context type.
 	invalidCtx := readErrorFromFile(t, "testdata/invalid-ctx-access.log")
-	t.Log(invalidCtx)
 	qt.Assert(t, invalidCtx.Error(), qt.Contains, "func '__x64_sys_recvfrom' arg0 type FWD is not a struct: invalid bpf_context access off=0 size=8")
 }
 
@@ -120,5 +118,5 @@ func readErrorFromFile(tb testing.TB, file string) *VerifierError {
 		tb.Fatal("Read file:", err)
 	}
 
-	return ErrorWithLog(unix.EINVAL, contents)
+	return ErrorWithLog(unix.EINVAL, nil, contents)
 }
diff --git a/internal/testdata/truncated.log b/internal/testdata/truncated.log
diff --git a/prog.go b/prog.go
@@ -331,12 +331,13 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
 
 	// If the caller did not specify a log level,
 	// re-run with branch-level verifier logs enabled.
+	var ve error
 	if !opts.LogDisabled && opts.LogLevel == 0 {
 		logBuf = make([]byte, opts.LogSize)
 		attr.LogLevel = LogLevelBranch
 		attr.LogSize = uint32(len(logBuf))
 		attr.LogBuf = sys.NewSlicePointer(logBuf)
-		_, _ = sys.ProgLoad(attr)
+		_, ve = sys.ProgLoad(attr)
 	}
 
 	switch {
@@ -361,7 +362,7 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
 		}
 	}
 
-	err = internal.ErrorWithLog(err, logBuf)
+	err = internal.ErrorWithLog(err, ve, logBuf)
 	if btfDisabled {
 		return nil, fmt.Errorf("load program: %w (kernel without BTF support)", err)
 	}