Skip to content

Commit

Permalink
s2: Load after checking src limit on amd64. (#362)
Browse files Browse the repository at this point in the history
Fixes rare crash on amd64.

Fixes #361
  • Loading branch information
klauspost authored Apr 22, 2021
1 parent 93b05d6 commit a4d53ad
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 16 deletions.
4 changes: 2 additions & 2 deletions s2/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
})

cv := GP64()
MOVQ(Mem{Base: src, Index: s, Scale: 1}, cv)
nextS := GP32()
// nextS := s + (s-nextEmit)>>6 + 4
{
Expand All @@ -252,6 +251,7 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
CMPL(nextS.As32(), sLimitL)
JGE(LabelRef("emit_remainder_" + name))
}
MOVQ(Mem{Base: src, Index: s, Scale: 1}, cv)
assert(func(ok LabelRef) {
// Check if s is valid (we should have jumped above if not)
tmp := GP64()
Expand Down Expand Up @@ -888,7 +888,6 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, skipLog, lHash
})

cv := GP64()
MOVQ(Mem{Base: src, Index: s, Scale: 1}, cv)
nextS := GP32()
// nextS := s + (s-nextEmit)>>skipLog + 1
{
Expand All @@ -903,6 +902,7 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, skipLog, lHash
CMPL(nextS.As32(), sLimitL)
JGE(LabelRef("emit_remainder_" + name))
}
MOVQ(Mem{Base: src, Index: s, Scale: 1}, cv)
assert(func(ok LabelRef) {
// Check if s is valid (we should have jumped above if not)
tmp := GP64()
Expand Down
28 changes: 14 additions & 14 deletions s2/encodeblock_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ zero_loop_encodeBlockAsm:
MOVQ src_base+24(FP), DX

search_loop_encodeBlockAsm:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x06, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBlockAsm
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R8
MOVQ SI, R9
Expand Down Expand Up @@ -1252,13 +1252,13 @@ zero_loop_encodeBlockAsm4MB:
MOVQ src_base+24(FP), DX

search_loop_encodeBlockAsm4MB:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x06, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBlockAsm4MB
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R8
MOVQ SI, R9
Expand Down Expand Up @@ -2385,13 +2385,13 @@ zero_loop_encodeBlockAsm12B:
MOVQ src_base+24(FP), DX

search_loop_encodeBlockAsm12B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x05, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBlockAsm12B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x000000cf1bbcdcbb, R8
MOVQ SI, R9
Expand Down Expand Up @@ -3289,13 +3289,13 @@ zero_loop_encodeBlockAsm10B:
MOVQ src_base+24(FP), DX

search_loop_encodeBlockAsm10B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x05, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBlockAsm10B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x9e3779b1, R8
MOVQ SI, R9
Expand Down Expand Up @@ -4193,13 +4193,13 @@ zero_loop_encodeBlockAsm8B:
MOVQ src_base+24(FP), DX

search_loop_encodeBlockAsm8B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x04, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBlockAsm8B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x9e3779b1, R8
MOVQ SI, R9
Expand Down Expand Up @@ -5081,13 +5081,13 @@ zero_loop_encodeBetterBlockAsm:
MOVQ src_base+24(FP), DX

search_loop_encodeBetterBlockAsm:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x07, BP
LEAL 1(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R8
MOVQ $0x9e3779b1, BP
Expand Down Expand Up @@ -6059,13 +6059,13 @@ zero_loop_encodeBetterBlockAsm4MB:
MOVQ src_base+24(FP), DX

search_loop_encodeBetterBlockAsm4MB:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x07, BP
LEAL 1(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm4MB
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R8
MOVQ $0x9e3779b1, BP
Expand Down Expand Up @@ -6980,13 +6980,13 @@ zero_loop_encodeBetterBlockAsm12B:
MOVQ src_base+24(FP), DX

search_loop_encodeBetterBlockAsm12B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x06, BP
LEAL 1(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm12B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R8
MOVQ $0x9e3779b1, BP
Expand Down Expand Up @@ -7755,13 +7755,13 @@ zero_loop_encodeBetterBlockAsm10B:
MOVQ src_base+24(FP), DX

search_loop_encodeBetterBlockAsm10B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x05, BP
LEAL 1(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm10B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R8
MOVQ $0x9e3779b1, BP
Expand Down Expand Up @@ -8530,13 +8530,13 @@ zero_loop_encodeBetterBlockAsm8B:
MOVQ src_base+24(FP), DX

search_loop_encodeBetterBlockAsm8B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x04, BP
LEAL 1(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm8B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R8
MOVQ $0x9e3779b1, BP
Expand Down Expand Up @@ -9295,13 +9295,13 @@ zero_loop_encodeSnappyBlockAsm:
MOVQ src_base+24(FP), DX

search_loop_encodeSnappyBlockAsm:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x06, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R8
MOVQ SI, R9
Expand Down Expand Up @@ -10169,13 +10169,13 @@ zero_loop_encodeSnappyBlockAsm12B:
MOVQ src_base+24(FP), DX

search_loop_encodeSnappyBlockAsm12B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x05, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm12B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x000000cf1bbcdcbb, R8
MOVQ SI, R9
Expand Down Expand Up @@ -10938,13 +10938,13 @@ zero_loop_encodeSnappyBlockAsm10B:
MOVQ src_base+24(FP), DX

search_loop_encodeSnappyBlockAsm10B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x05, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm10B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x9e3779b1, R8
MOVQ SI, R9
Expand Down Expand Up @@ -11707,13 +11707,13 @@ zero_loop_encodeSnappyBlockAsm8B:
MOVQ src_base+24(FP), DX

search_loop_encodeSnappyBlockAsm8B:
MOVQ (DX)(CX*1), SI
MOVL CX, BP
SUBL 12(SP), BP
SHRL $0x04, BP
LEAL 4(CX)(BP*1), BP
CMPL BP, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm8B
MOVQ (DX)(CX*1), SI
MOVL BP, 20(SP)
MOVQ $0x9e3779b1, R8
MOVQ SI, R9
Expand Down

0 comments on commit a4d53ad

Please sign in to comment.