Skip to content

Commit

Permalink
java: Improve text trimming performance (#85)
Browse files Browse the repository at this point in the history
Precompiled the Pattern used in the utility methods.

Improves performance. `StringUtils.trim()` is more than 3 times faster.

Fixes: #84
  • Loading branch information
jkronegg committed Jan 3, 2023
1 parent 6603b82 commit 165ab85
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 5 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt
## [Unreleased]
- [Javascript, PHP, Ruby] Allow messages v19.1.4 to v20.0.1 to be used

### Fixed
- [Java] Improve text trim performance ([#84](https://github.com/cucumber/gherkin/issues/84))

## [26.0.2] - 2022-12-27
### Fixed
- [Go] Fix module names (again)
Expand Down
18 changes: 13 additions & 5 deletions java/src/main/java/io/cucumber/gherkin/StringUtils.java
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@
package io.cucumber.gherkin;

import java.util.regex.Pattern;

class StringUtils {

private static final Pattern LTRIM = Pattern.compile("^[ \\t\\n\\x0B\\f\\r\\x85\\xA0]+");
private static final Pattern LTRIM_KEEP_NEW_LINES = Pattern.compile("^[ \\t\\x0B\\f\\r\\x85\\xA0]+");
private static final Pattern RTRIM_KEEP_NEW_LINES = Pattern.compile("[ \\t\\x0B\\f\\r\\x85\\xA0]+$");
private static final Pattern RTRIM = Pattern.compile("[ \\t\\n\\x0B\\f\\r\\x85\\xA0]+$");
private static final Pattern TRIM = Pattern.compile("^[ \\t\\n\\x0B\\f\\r\\x85\\xA0]+|[ \\t\\n\\x0B\\f\\r\\x85\\xA0]+$");

static String ltrim(String s) {
// https://stackoverflow.com/questions/1060570/why-is-non-breaking-space-not-a-whitespace-character-in-java
return s.replaceAll("^[ \\t\\n\\x0B\\f\\r\\x85\\xA0]+", "");
return LTRIM.matcher(s).replaceAll("");
}

static String ltrimKeepNewLines(String s) {
// https://stackoverflow.com/questions/1060570/why-is-non-breaking-space-not-a-whitespace-character-in-java
return s.replaceAll("^[ \\t\\x0B\\f\\r\\x85\\xA0]+", "");
return LTRIM_KEEP_NEW_LINES.matcher(s).replaceAll("");
}

static String rtrimKeepNewLines(String s) {
// https://stackoverflow.com/questions/1060570/why-is-non-breaking-space-not-a-whitespace-character-in-java
return s.replaceAll("[ \\t\\x0B\\f\\r\\x85\\xA0]+$", "");
return RTRIM_KEEP_NEW_LINES.matcher(s).replaceAll("");
}

static String rtrim(String s) {
return s.replaceAll("[ \\t\\n\\x0B\\f\\r\\x85\\xA0]+$", "");
return RTRIM.matcher(s).replaceAll("");
}

static String trim(String s) {
return ltrim(rtrim(s));
return TRIM.matcher(s).replaceAll("");
}

static int symbolCount(String string) {
Expand Down
60 changes: 60 additions & 0 deletions java/src/test/java/io/cucumber/gherkin/StringUtilsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package io.cucumber.gherkin;

import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class StringUtilsTest {
private static final String WHITESPACE = "\u00A0 \t";
private static final String CUCUMBER = "🥒";

@Test
void testRtrim() {
assertEquals(WHITESPACE + CUCUMBER, StringUtils.rtrim(WHITESPACE + CUCUMBER + WHITESPACE));
}

@Test
void testRtrim_multiline() {
assertEquals("\n" + WHITESPACE + "\n" + WHITESPACE + CUCUMBER,
StringUtils.rtrim("\n" + WHITESPACE + "\n" + WHITESPACE + CUCUMBER + WHITESPACE + "\n" + WHITESPACE + "\n"));
}

@Test
void testRtrimKeepNewlines() {
assertEquals(WHITESPACE + CUCUMBER + "\n", StringUtils.rtrimKeepNewLines(WHITESPACE + CUCUMBER + "\n" + WHITESPACE));
}

@Test
void testLtrim() {
assertEquals(CUCUMBER + WHITESPACE, StringUtils.ltrim(WHITESPACE + CUCUMBER + WHITESPACE));
}

@Test
void testLtrim_multiline() {
assertEquals(CUCUMBER + WHITESPACE + "\n" + WHITESPACE + "\n",
StringUtils.ltrim("\n" + WHITESPACE + "\n" + WHITESPACE + CUCUMBER + WHITESPACE + "\n" + WHITESPACE + "\n"));
}

@Test
void testLtrimKeepNewlines() {
assertEquals("\n" + CUCUMBER + WHITESPACE, StringUtils.ltrimKeepNewLines(WHITESPACE + "\n" + CUCUMBER + WHITESPACE));
}

@Test
void testTrim() {
assertEquals(CUCUMBER, StringUtils.trim(WHITESPACE + CUCUMBER + WHITESPACE));
}

@Test
void testTrim_multiline() {
assertEquals(CUCUMBER,
StringUtils.trim("\n" + WHITESPACE + "\n" + WHITESPACE + CUCUMBER + WHITESPACE + "\n" + WHITESPACE + "\n"));
}

@Test
void testTrim_empty() {
assertEquals("",
StringUtils.trim(""));
}
}

0 comments on commit 165ab85

Please sign in to comment.