From af07f36391ff7e0d0fa596c2d960b23e0a29c978 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 27 Apr 2022 20:19:08 -0700 Subject: [PATCH 1/5] size_test: add tests for 0.3 + suffix Such sizes are quite valid but were never tested. Signed-off-by: Kir Kolyshkin --- size_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/size_test.go b/size_test.go index ab389ec..9931199 100644 --- a/size_test.go +++ b/size_test.go @@ -98,6 +98,7 @@ func TestFromHumanSize(t *testing.T) { assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5kB") assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB") assertSuccessEquals(t, 32, FromHumanSize, "32.5 B") + assertSuccessEquals(t, 300, FromHumanSize, "0.3 K") assertError(t, FromHumanSize, "") assertError(t, FromHumanSize, "hello") @@ -128,6 +129,8 @@ func TestRAMInBytes(t *testing.T) { assertSuccessEquals(t, 32, RAMInBytes, "32.3") tmp := 32.3 * MiB assertSuccessEquals(t, int64(tmp), RAMInBytes, "32.3 mb") + tmp = 0.3 * MiB + assertSuccessEquals(t, int64(tmp), RAMInBytes, "0.3MB") assertError(t, RAMInBytes, "") assertError(t, RAMInBytes, "hello") From 1d23ffa157447c3452bb2e31f0bdf9118fafb88a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 27 Apr 2022 20:23:23 -0700 Subject: [PATCH 2/5] size_test: add parseSize benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shows on my laptop (6 iterations processed by benchstat): name time/op ParseSize-4 10.6µs ± 3% name alloc/op ParseSize-4 3.26kB ± 0% name allocs/op ParseSize-4 72.0 ± 0% Signed-off-by: Kir Kolyshkin --- size_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/size_test.go b/size_test.go index 9931199..5bdaddd 100644 --- a/size_test.go +++ b/size_test.go @@ -140,6 +140,18 @@ func TestRAMInBytes(t *testing.T) { assertError(t, RAMInBytes, "32bm") } +func BenchmarkParseSize(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, s := range []string{ + "", "32", "32b", "32 B", "32k", "32.5 K", "32kb", "32 Kb", + "32.8Mb", "32.9Gb", "32.777Tb", "32Pb", "0.3Mb", "-1", + } { + FromHumanSize(s) + RAMInBytes(s) + } + } +} + func assertEquals(t *testing.T, expected, actual interface{}) { if expected != actual { t.Errorf("Expected '%v' but got '%v'", expected, actual) From 54c3e559548be5ab7ea742b174496f5cb2555d29 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 28 Apr 2022 12:15:26 -0700 Subject: [PATCH 3/5] size_test: add t.Helper annotations This way, error messages will show correct line information. Signed-off-by: Kir Kolyshkin --- size_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/size_test.go b/size_test.go index 5bdaddd..b18dacb 100644 --- a/size_test.go +++ b/size_test.go @@ -153,6 +153,7 @@ func BenchmarkParseSize(b *testing.B) { } func assertEquals(t *testing.T, expected, actual interface{}) { + t.Helper() if expected != actual { t.Errorf("Expected '%v' but got '%v'", expected, actual) } @@ -168,6 +169,7 @@ func (fn parseFn) String() string { } func assertSuccessEquals(t *testing.T, expected int64, fn parseFn, arg string) { + t.Helper() res, err := fn(arg) if err != nil || res != expected { t.Errorf("%s(\"%s\") -> expected '%d' but got '%d' with error '%v'", fn, arg, expected, res, err) @@ -175,6 +177,7 @@ func assertSuccessEquals(t *testing.T, expected int64, fn parseFn, arg string) { } func assertError(t *testing.T, fn parseFn, arg string) { + t.Helper() res, err := fn(arg) if err == nil && res != -1 { t.Errorf("%s(\"%s\") -> expected error but got '%d'", fn, arg, res) From cec49607f776c9f7f3b300e746ddbd493b2f45ed Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Thu, 28 Apr 2022 12:16:27 -0700 Subject: [PATCH 4/5] size_test: add more tests Co-authored-by: Kir Kolyshkin --- size_test.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/size_test.go b/size_test.go index b18dacb..e43f19a 100644 --- a/size_test.go +++ b/size_test.go @@ -83,6 +83,10 @@ func TestHumanSize(t *testing.T) { } func TestFromHumanSize(t *testing.T) { + assertSuccessEquals(t, 0, FromHumanSize, "0") + assertSuccessEquals(t, 0, FromHumanSize, "0b") + assertSuccessEquals(t, 0, FromHumanSize, "0B") + assertSuccessEquals(t, 0, FromHumanSize, "0 B") assertSuccessEquals(t, 32, FromHumanSize, "32") assertSuccessEquals(t, 32, FromHumanSize, "32b") assertSuccessEquals(t, 32, FromHumanSize, "32B") @@ -100,10 +104,56 @@ func TestFromHumanSize(t *testing.T) { assertSuccessEquals(t, 32, FromHumanSize, "32.5 B") assertSuccessEquals(t, 300, FromHumanSize, "0.3 K") + // We do not tolerate extra leading or trailing spaces + // (except for a space after the number and a missing suffix). + assertSuccessEquals(t, 0, FromHumanSize, "0 ") + + assertError(t, FromHumanSize, " 0") + assertError(t, FromHumanSize, " 0b") + assertError(t, FromHumanSize, " 0B") + assertError(t, FromHumanSize, " 0 B") + assertError(t, FromHumanSize, "0b ") + assertError(t, FromHumanSize, "0B ") + assertError(t, FromHumanSize, "0 B ") + assertError(t, FromHumanSize, "") assertError(t, FromHumanSize, "hello") + assertError(t, FromHumanSize, ".") + assertError(t, FromHumanSize, ". ") + assertError(t, FromHumanSize, " ") + assertError(t, FromHumanSize, " ") + assertError(t, FromHumanSize, " .") + assertError(t, FromHumanSize, " . ") + assertError(t, FromHumanSize, "0.") + assertError(t, FromHumanSize, "0. ") + assertError(t, FromHumanSize, "0.b") + assertError(t, FromHumanSize, "0.B") + assertError(t, FromHumanSize, "-0") + assertError(t, FromHumanSize, "-0b") + assertError(t, FromHumanSize, "-0B") + assertError(t, FromHumanSize, "-0 b") + assertError(t, FromHumanSize, "-0 B") assertError(t, FromHumanSize, "-32") assertError(t, FromHumanSize, ".3kB") + assertError(t, FromHumanSize, "-32b") + assertError(t, FromHumanSize, "-32B") + assertError(t, FromHumanSize, "-32 b") + assertError(t, FromHumanSize, "-32 B") + assertError(t, FromHumanSize, "32.") + assertError(t, FromHumanSize, "32.b") + assertError(t, FromHumanSize, "32.B") + assertError(t, FromHumanSize, "32. b") + assertError(t, FromHumanSize, "32. B") + assertError(t, FromHumanSize, "32b.") + assertError(t, FromHumanSize, "32B.") + assertError(t, FromHumanSize, "32 b.") + assertError(t, FromHumanSize, "32 B.") + assertError(t, FromHumanSize, "32 bb") + assertError(t, FromHumanSize, "32 BB") + assertError(t, FromHumanSize, "32 b b") + assertError(t, FromHumanSize, "32 B B") + assertError(t, FromHumanSize, "32 b") + assertError(t, FromHumanSize, "32 B") assertError(t, FromHumanSize, " 32 ") assertError(t, FromHumanSize, "32m b") assertError(t, FromHumanSize, "32bm") From 737572633c434ce2d80ba3fbe2d8ea15d7d821ff Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 27 Apr 2022 20:30:02 -0700 Subject: [PATCH 5/5] size: stop using regexp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using a regular expression brings in the whole regexp and regexp/syntax packages, which increase the resulting binary size by about 130K (Go 1.18.1, Linux/amd64). Besides, regular expressions are generally slow and incur some initialization overhead (to compile all global regexp.MustComplile variables). This, unlike the size difference, is not the main motivation for this commit, but it feels like it should have also been mentioned. A quick benchmark comparison shows a huge improvement (again, this is not why this is done, nevertheless it pleases the eye): name old time/op new time/op delta ParseSize-4 10.6µs ± 3% 2.6µs ±29% -75.10% (p=0.002 n=6+6) name old alloc/op new alloc/op delta ParseSize-4 3.26kB ± 0% 0.20kB ± 0% -93.75% (p=0.000 n=7+6) name old allocs/op new allocs/op delta ParseSize-4 72.0 ± 0% 26.0 ± 0% -63.89% (p=0.000 n=7+6) Compatibility note: As a result, we are now a but more relaxed to the input, allowing e.g. ".4 Gb", or "-0", or "234. B", following the rules of strconv.ParseFloat. It seems that those were previously rejected as a result of a regex being used, not deliberately. Signed-off-by: Kir Kolyshkin --- size.go | 70 +++++++++++++++++++++++++++++++++++++++++++--------- size_test.go | 31 ++++++++++++----------- 2 files changed, 74 insertions(+), 27 deletions(-) diff --git a/size.go b/size.go index 85f6ab0..c245a89 100644 --- a/size.go +++ b/size.go @@ -2,7 +2,6 @@ package units import ( "fmt" - "regexp" "strconv" "strings" ) @@ -26,16 +25,17 @@ const ( PiB = 1024 * TiB ) -type unitMap map[string]int64 +type unitMap map[byte]int64 var ( - decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB} - binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB} - sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`) + decimalMap = unitMap{'k': KB, 'm': MB, 'g': GB, 't': TB, 'p': PB} + binaryMap = unitMap{'k': KiB, 'm': MiB, 'g': GiB, 't': TiB, 'p': PiB} ) -var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} -var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} +var ( + decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"} + binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} +) func getSizeAndUnit(size float64, base float64, _map []string) (float64, string) { i := 0 @@ -89,20 +89,66 @@ func RAMInBytes(size string) (int64, error) { // Parses the human-readable size string into the amount it represents. func parseSize(sizeStr string, uMap unitMap) (int64, error) { - matches := sizeRegex.FindStringSubmatch(sizeStr) - if len(matches) != 4 { + // TODO: rewrite to use strings.Cut if there's a space + // once Go < 1.18 is deprecated. + sep := strings.LastIndexAny(sizeStr, "01234567890. ") + if sep == -1 { + // There should be at least a digit. return -1, fmt.Errorf("invalid size: '%s'", sizeStr) } + var num, sfx string + if sizeStr[sep] != ' ' { + num = sizeStr[:sep+1] + sfx = sizeStr[sep+1:] + } else { + // Omit the space separator. + num = sizeStr[:sep] + sfx = sizeStr[sep+1:] + } - size, err := strconv.ParseFloat(matches[1], 64) + size, err := strconv.ParseFloat(num, 64) if err != nil { return -1, err } + // Backward compatibility: reject negative sizes. + if size < 0 { + return -1, fmt.Errorf("invalid size: '%s'", sizeStr) + } + + if len(sfx) == 0 { + return int64(size), nil + } - unitPrefix := strings.ToLower(matches[3]) - if mul, ok := uMap[unitPrefix]; ok { + // Process the suffix. + + if len(sfx) > 3 { // Too long. + goto badSuffix + } + sfx = strings.ToLower(sfx) + // Trivial case: b suffix. + if sfx[0] == 'b' { + if len(sfx) > 1 { // no extra characters allowed after b. + goto badSuffix + } + return int64(size), nil + } + // A suffix from the map. + if mul, ok := uMap[sfx[0]]; ok { size *= float64(mul) + } else { + goto badSuffix + } + + // The suffix may have extra "b" or "ib" (e.g. KiB or MB). + switch { + case len(sfx) == 2 && sfx[1] != 'b': + goto badSuffix + case len(sfx) == 3 && sfx[1:] != "ib": + goto badSuffix } return int64(size), nil + +badSuffix: + return -1, fmt.Errorf("invalid suffix: '%s'", sfx) } diff --git a/size_test.go b/size_test.go index e43f19a..f9b1d59 100644 --- a/size_test.go +++ b/size_test.go @@ -103,6 +103,22 @@ func TestFromHumanSize(t *testing.T) { assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB") assertSuccessEquals(t, 32, FromHumanSize, "32.5 B") assertSuccessEquals(t, 300, FromHumanSize, "0.3 K") + assertSuccessEquals(t, 300, FromHumanSize, ".3kB") + + assertSuccessEquals(t, 0, FromHumanSize, "0.") + assertSuccessEquals(t, 0, FromHumanSize, "0. ") + assertSuccessEquals(t, 0, FromHumanSize, "0.b") + assertSuccessEquals(t, 0, FromHumanSize, "0.B") + assertSuccessEquals(t, 0, FromHumanSize, "-0") + assertSuccessEquals(t, 0, FromHumanSize, "-0b") + assertSuccessEquals(t, 0, FromHumanSize, "-0B") + assertSuccessEquals(t, 0, FromHumanSize, "-0 b") + assertSuccessEquals(t, 0, FromHumanSize, "-0 B") + assertSuccessEquals(t, 32, FromHumanSize, "32.") + assertSuccessEquals(t, 32, FromHumanSize, "32.b") + assertSuccessEquals(t, 32, FromHumanSize, "32.B") + assertSuccessEquals(t, 32, FromHumanSize, "32. b") + assertSuccessEquals(t, 32, FromHumanSize, "32. B") // We do not tolerate extra leading or trailing spaces // (except for a space after the number and a missing suffix). @@ -124,26 +140,11 @@ func TestFromHumanSize(t *testing.T) { assertError(t, FromHumanSize, " ") assertError(t, FromHumanSize, " .") assertError(t, FromHumanSize, " . ") - assertError(t, FromHumanSize, "0.") - assertError(t, FromHumanSize, "0. ") - assertError(t, FromHumanSize, "0.b") - assertError(t, FromHumanSize, "0.B") - assertError(t, FromHumanSize, "-0") - assertError(t, FromHumanSize, "-0b") - assertError(t, FromHumanSize, "-0B") - assertError(t, FromHumanSize, "-0 b") - assertError(t, FromHumanSize, "-0 B") assertError(t, FromHumanSize, "-32") - assertError(t, FromHumanSize, ".3kB") assertError(t, FromHumanSize, "-32b") assertError(t, FromHumanSize, "-32B") assertError(t, FromHumanSize, "-32 b") assertError(t, FromHumanSize, "-32 B") - assertError(t, FromHumanSize, "32.") - assertError(t, FromHumanSize, "32.b") - assertError(t, FromHumanSize, "32.B") - assertError(t, FromHumanSize, "32. b") - assertError(t, FromHumanSize, "32. B") assertError(t, FromHumanSize, "32b.") assertError(t, FromHumanSize, "32B.") assertError(t, FromHumanSize, "32 b.")