Skip to content
This repository has been archived by the owner on Jun 6, 2023. It is now read-only.

batch verify seals syscall implemented incorrectly #1472

Open
hunjixin opened this issue Aug 19, 2021 · 11 comments
Open

batch verify seals syscall implemented incorrectly #1472

hunjixin opened this issue Aug 19, 2021 · 11 comments

Comments

@hunjixin
Copy link

hunjixin commented Aug 19, 2021

This error occurs very occasionally, does anyone have an idea? i have confirm BatchVerifySeals argument and return has the same length

@ZenGround0
Copy link
Contributor

@hunjixin please add information about this error

@hunjixin
Copy link
Author

@ZenGround0 no any more info, just root unmatch, and have a error message {"errorMessage": "batch verify seals syscall implemented incorrectly", "exitCode": "17", "sender": "f03", "receiver": "f04", "methodNum": "5", "Value": "0", "gasLimit": 100000000000000}

@hunjixin
Copy link
Author

hunjixin commented Aug 20, 2021

in the code. i have check BatchVerifySeals , this func argument and return has the same length. in the code if miner contain the element , and the verifies must have too.

func (a Actor) processBatchProofVerifies(rt Runtime) {
	var st State

	var miners []addr.Address
	verifies := make(map[addr.Address][]proof.SealVerifyInfo)

	rt.StateTransaction(&st, func() {
		store := adt.AsStore(rt)
		if st.ProofValidationBatch == nil {
			return
		}
		mmap, err := adt.AsMultimap(store, *st.ProofValidationBatch, builtin.DefaultHamtBitwidth, ProofValidationBatchAmtBitwidth)
		builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to load proofs validation batch")

		claims, err := adt.AsMap(adt.AsStore(rt), st.Claims, builtin.DefaultHamtBitwidth)
		builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to load claims")

		err = mmap.ForAll(func(k string, arr *adt.Array) error {
			a, err := addr.NewFromBytes([]byte(k))
			builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to parse address key")

			// refuse to process proofs for miner with no claim
			found, err := claims.Has(abi.AddrKey(a))
			builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to look up claim")
			if !found {
				rt.Log(rtt.WARN, "skipping batch verifies for unknown miner %s", a)
				return nil
			}

			miners = append(miners, a)

			var infos []proof.SealVerifyInfo
			var svi proof.SealVerifyInfo
			err = arr.ForEach(&svi, func(i int64) error {
				infos = append(infos, svi)
				return nil
			})
			builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to iterate over proof verify array for miner %s", a)

			verifies[a] = infos
			return nil
		})
		builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to iterate proof batch")

		st.ProofValidationBatch = nil
	})

	res, err := rt.BatchVerifySeals(verifies)
	builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to batch verify")

	for _, m := range miners {
		vres, ok := res[m]
		if !ok {
			rt.Abortf(exitcode.ErrNotFound, "batch verify seals syscall implemented incorrectly")
		}

		verifs := verifies[m]

		seen := map[abi.SectorNumber]struct{}{}
		var successful []abi.SectorNumber
		for i, r := range vres {
			if r {
				snum := verifs[i].SectorID.Number

				if _, exists := seen[snum]; exists {
					// filter-out duplicates
					continue
				}

				seen[snum] = struct{}{}
				successful = append(successful, snum)
			}
		}

		if len(successful) > 0 {
			// The exit code is explicitly ignored
			_ = rt.Send(
				m,
				builtin.MethodsMiner.ConfirmSectorProofsValid,
				&builtin.ConfirmSectorProofsParams{Sectors: successful},
				abi.NewTokenAmount(0),
				&builtin.Discard{},
			)
		}
	}
}

@hunjixin
Copy link
Author

@ZenGround0 any idea?

@ZenGround0
Copy link
Contributor

@hunjixin it looks like your syscall implementation is not returning a map with all miners as keys. This is required even if the miner failed all sectors.

@hunjixin
Copy link
Author

hunjixin commented Aug 27, 2021

@ZenGround0 i have add log in in BatchVerifySeals, there but just compaire the length of arguments and returns. they have the same length. cold nearly the same as lotus. very little chance to meet this problems, there are a error in latest two month

func (sys syscalls) BatchVerifySeals(vis map[address.Address][]proof5.SealVerifyInfo) (map[address.Address][]bool, error) {
	out := make(map[address.Address][]bool)

	sema := make(chan struct{}, BatchSealVerifyParallelism)
	vmlog.Info("BatchVerifySeals miners:", len(vis))
	var wg sync.WaitGroup
	for addr, seals := range vis {
		results := make([]bool, len(seals))
		out[addr] = results

		for i, s := range seals {
			wg.Add(1)
			go func(ma address.Address, ix int, svi proof5.SealVerifyInfo, res []bool) {
				defer wg.Done()
				sema <- struct{}{}

				if err := sys.VerifySeal(svi); err != nil {
					vmlog.Warnw("seal verify in batch failed", "miner", ma, "index", ix, "err", err)
					res[ix] = false
				} else {
					res[ix] = true
				}

				<-sema
			}(addr, i, s, results)
		}
	}
	wg.Wait()
	vmlog.Info("BatchVerifySeals Result miners:", len(out))
	return out, nil
}

@ZenGround0
Copy link
Contributor

@hunjixin to confirm it is the venus node that sees the exitcode 17 error which leads to the state root mismatch because it is not seen on mainnet?

@ZenGround0
Copy link
Contributor

If this is the case you should rerun this at the problem epochs and inspect the data making it to res causing the error.

@hunjixin
Copy link
Author

hunjixin commented Aug 28, 2021

error

yes , root not match. but when set-head back to previous tipset and reprocess the same tipset. results always become ok. not restart process.

@hunjixin
Copy link
Author

hunjixin commented Oct 18, 2021

@ZenGround0

i add log like this

	keyInVerifies := []addr.Address{}
	for key, _ := range verifies {
		keyInVerifies = append(keyInVerifies, key)
	}
	rt.Log(rtt.INFO, "ID: %s, verifies keys before BatchVerifySeals %v", id, keyInVerifies)
	rt.Log(rtt.INFO, "ID: %s, miners keys before BatchVerifySeals %v", id, miners)

	res, err := rt.BatchVerifySeals(verifies)
	builtin.RequireNoErr(rt, err, exitcode.ErrIllegalState, "failed to batch verify")

	keyInRes := []addr.Address{}
	for key, _ := range res {
		keyInRes = append(keyInRes, key)
	}

	rt.Log(rtt.INFO, "ID: %s, return before BatchVerifySeals %v", id, keyInRes)

and got log like this

2021-10-16T18:48:03.634+0800	INFO	vm.actors	vmcontext/runtime_adapter.go:197	ID: 21c48411-0860-4061-82ae-7358eff93ae7, verifies keys before BatchVerifySeals [f01170291 f01317157 f01181168 f01207023 f01169696 f0392813 f0411877 f0226418 f01149485 f01218989 f01227383 f01154295 f065877 f01236627 f01103850 f0156452 f01024569 f01138709 f0107999 f01116666 f0150748 f0160735 f083419 f01271225 f087888 f01288529 f01250983 f01138139 f01247078 f01189202 f0469055 f0127378 f01145144 f01101315 f0151498 f01277031 f054420 f01090983 f01177077 f01319368 f01270285 f0428177 f01312143 f01182223 f01250837 f01043193 f01348517 f01098119 f01125168 f01365744 f01251528 f0442377 f0454186 f0124554 f01123833 f0135066 f01261075 f01031867 f01071719 f0156417 f01353593 f062982 f01263957 f0881687 f01122841 f01272340 f01038625 f02419 f01191029 f01096056 f01193462]
2021-10-16T18:48:03.634+0800	INFO	vm.actors	vmcontext/runtime_adapter.go:197	ID: 21c48411-0860-4061-82ae-7358eff93ae7, miners keys before BatchVerifySeals   [f054420 f01125168 f0442377 f01236627 f0454186 f01317157 f0881687 f01181168 f01365744 f0124554 f01090983 f01247078 f01191029 f087888 f01103850 f01071719 f01177077 f01154295 f01182223 f01288529 f0156452 f0411877 f0226418 f01170291 f01207023 f01122841 f01024569 f01250837 f0150748 f01169696 f01096056 f01043193 f01149485 f01218989 f01348517 f01319368 f01138709 f01123833 f01270285 f0135066 f01251528 f01272340 f01189202 f0160735 f0428177 f0156417 f01038625 f0469055 f01250983 f01353593 f01098119 f0107999 f083419 f062982 f01271225 f01193462 f02419 f0127378 f01145144 f01116666 f01312143 f01227383 f01101315 f0151498 f01277031 f01261075 f01263957 f0392813 f01031867 f065877 f01138139]
2021-10-16T18:48:03.634+0800	INFO	vm.context	vmcontext/syscalls.go:91	BatchVerifySeals miners:71
2021-10-16T18:48:04.029+0800	INFO	vm.context	vmcontext/syscalls.go:115	BatchVerifySeals Result miners:71
2021-10-16T18:48:04.029+0800	INFO	vm.actors	vmcontext/runtime_adapter.go:197	ID: 21c48411-0860-4061-82ae-7358eff93ae7, return before BatchVerifySeals        [f0127378 f01031867 f01181168 f0392813 f0156452 f087888 f01247078 f01189202 f0124554 f01250983 f01090983 f01182223 f0881687 f0150748 f01122841 f01138709 f054420 f01193462 f01170291 f02419 f01024569 f01277031 f01125168 f01263957 f0428177 f01365744 f01123833 f01236627 f01101315 f01319368 f01270285 f01038625 f01096056 f01317157 f0411877 f0226418 f083419 f01145144 f01261075 f01169696 f01218989 f065877 f01103850 f01177077 f01098119 f0135066 f01312143 f01043193 f01272340 f01149485 f0107999 f01138139 f0469055 f0151498 f01116666 f01071719 f0156417 f01353593 f01154295 f01271225 f01348517 f01207023 f01250837 f01191029 f0160735 f01251528 f0454186 f01288529 f01227383 f0442377 f062982]
2021-10-16T18:48:04.029+0800	WARN	vm.context	vmcontext/invocation_context.go:197	Abort during actor execution.	{"errorMessage": "batch verify seals syscall implemented incorrectly", "exitCode": "17", "sender": "f03", "receiver": "f04", "methodNum": "5", "Value": "0", "gasLimit": 100000000000000}
2021-10-16T18:48:04.092+0800	INFO	vm.context	vmcontext/vmcontext.go:333	process cron: 463

before loop this miner keys are the same as res keys.

@hunjixin
Copy link
Author

hunjixin commented Nov 5, 2021

@ZenGround0 in v6 version , got error like this

2021-11-04T19:29:35.417+0800  ERROR  vm.actors  vmcontext/runtime_adapter.go:188  unexpected error processing batch proof verifies: batch verify seals syscall implemented incorrectly, result not found for miner: %!s(PANIC=String method: unknown address protocol). Skipping all verification for epoch 1257299

unknown address protocol ?

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants