Skip to content

Commit

Permalink
flate, zstd: Shave some bytes off amd64 matchLen (#963)
Browse files Browse the repository at this point in the history
XORQ sets the flags, so there is no need for TESTQ.

Use a 32-bit shift after TZCNT/BSF.
  • Loading branch information
greatroar committed May 30, 2024
1 parent 3a0faf3 commit 5f7dd25
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 12 deletions.
10 changes: 4 additions & 6 deletions flate/matchlen_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "textflag.h"

// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
Expand All @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56
JB matchlen_match4_standalone

matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
JZ matchlen_loop_standalone

#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
SHRL $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end

Expand Down
10 changes: 4 additions & 6 deletions zstd/matchlen_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "textflag.h"

// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
Expand All @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56
JB matchlen_match4_standalone

matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
JZ matchlen_loop_standalone

#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
SHRL $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end

Expand Down

0 comments on commit 5f7dd25

Please sign in to comment.