Skip to content

Commit

Permalink
Enhance parsing of TZDB short-forms (#196)
Browse files Browse the repository at this point in the history
* TZDB data can contain a wacky variety of shortened names
* Extend the parser to handle this
  • Loading branch information
jodastephen committed Sep 16, 2024
1 parent adcdbc4 commit 2678e18
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 75 deletions.
5 changes: 5 additions & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
<body>

<!-- types are add, fix, remove, update -->
<release version="1.7.0" date="SNAPSHOT" description="v1.7.0">
<action dev="jodastephen" type="fix" >
Enhance TZDB parsing, handling more shortened forms.
</action>
</release>
<release version="1.6.9" date="2024-03-25" description="v1.6.9">
<action dev="jodastephen" type="update" >
Update to time-zone data 2024agtz.
Expand Down
88 changes: 65 additions & 23 deletions src/main/java/org/threeten/bp/zone/TzdbZoneRulesCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
Expand Down Expand Up @@ -91,6 +93,54 @@ final class TzdbZoneRulesCompiler {
.optionalStart().appendLiteral(':').appendValue(SECOND_OF_MINUTE, 2)
.toFormatter();
}
private static final Set<String> RULE_LOOKUP = expand("rule", "r");
private static final Set<String> ZONE_LOOKUP = expand("zone", "z");
private static final Set<String> LINK_LOOKUP = expand("link", "l");
private static final Set<String> MIN_YEAR_LOOKUP = expand("minimum", "mi");
private static final Set<String> MAX_YEAR_LOOKUP = expand("maximum", "ma");
private static final Set<String> ONLY_YEAR_LOOKUP = expand("only", "o");
private static final Map<String, Integer> MONTH_LOOKUP = new HashMap<String, Integer>();
static {
put(expand("january", "ja"), 1, MONTH_LOOKUP);
put(expand("february", "f"), 2, MONTH_LOOKUP);
put(expand("march", "mar"), 3, MONTH_LOOKUP);
put(expand("april", "ap"), 4, MONTH_LOOKUP);
put(expand("may", "may"), 5, MONTH_LOOKUP);
put(expand("june", "jun"), 6, MONTH_LOOKUP);
put(expand("july", "jul"), 7, MONTH_LOOKUP);
put(expand("august", "au"), 8, MONTH_LOOKUP);
put(expand("september", "s"), 9, MONTH_LOOKUP);
put(expand("october", "o"), 10, MONTH_LOOKUP);
put(expand("november", "n"), 11, MONTH_LOOKUP);
put(expand("december", "d"), 12, MONTH_LOOKUP);
}
private static final Map<String, Integer> DOW_LOOKUP = new HashMap<String, Integer>();
static {
put(expand("monday", "m"), 1, DOW_LOOKUP);
put(expand("tuesday", "tu"), 2, DOW_LOOKUP);
put(expand("wednesday", "w"), 3, DOW_LOOKUP);
put(expand("thursday", "th"), 4, DOW_LOOKUP);
put(expand("friday", "f"), 5, DOW_LOOKUP);
put(expand("saturday", "sa"), 6, DOW_LOOKUP);
put(expand("sunday", "su"), 7, DOW_LOOKUP);
}

private static void put(Set<String> strs, int value, Map<String, Integer> map) {
for (Iterator<String> it = strs.iterator(); it.hasNext();) {
map.put(it.next(), value);
}
}

private static Set<String> expand(String whole, String shortest) {
Set<String> set = new HashSet<String>();
String code = whole;
while (!code.equals(shortest)) {
set.add(code);
code = code.substring(0, code.length() - 1);
}
set.add(code);
return set;
}

/**
* Reads a set of TZDB files and builds a single combined data file.
Expand Down Expand Up @@ -655,7 +705,7 @@ private void parseFile(File file) throws Exception {
} else {
if (st.hasMoreTokens()) {
String first = st.nextToken();
if (first.equals("Zone")) {
if (ZONE_LOOKUP.contains(first)) {
if (st.countTokens() < 3) {
printVerbose("Invalid Zone line in file: " + file + ", line: " + line);
throw new IllegalArgumentException("Invalid Zone line");
Expand All @@ -667,14 +717,14 @@ private void parseFile(File file) throws Exception {
}
} else {
openZone = null;
if (first.equals("Rule")) {
if (RULE_LOOKUP.contains(first)) {
if (st.countTokens() < 9) {
printVerbose("Invalid Rule line in file: " + file + ", line: " + line);
throw new IllegalArgumentException("Invalid Rule line");
}
parseRuleLine(st);

} else if (first.equals("Link")) {
} else if (LINK_LOOKUP.contains(first)) {
if (st.countTokens() < 2) {
printVerbose("Invalid Link line in file: " + file + ", line: " + line);
throw new IllegalArgumentException("Invalid Link line");
Expand Down Expand Up @@ -798,39 +848,31 @@ private void parseMonthDayTime(StringTokenizer st, TZDBMonthDayTime mdt) {
}

private int parseYear(String str, int defaultYear) {
str = str.toLowerCase();
if (matches(str, "minimum")) {
String lower = str.toLowerCase(Locale.ENGLISH);
if (MIN_YEAR_LOOKUP.contains(lower)) {
return Year.MIN_VALUE;
} else if (matches(str, "maximum")) {
} else if (MAX_YEAR_LOOKUP.contains(lower)) {
return Year.MAX_VALUE;
} else if (str.equals("only")) {
} else if (ONLY_YEAR_LOOKUP.contains(lower)) {
return defaultYear;
}
return Integer.parseInt(str);
}

private Month parseMonth(String str) {
str = str.toLowerCase();
for (Month moy : Month.values()) {
if (matches(str, moy.name().toLowerCase())) {
return moy;
}
Integer value = MONTH_LOOKUP.get(str.toLowerCase(Locale.ENGLISH));
if (value == null) {
throw new IllegalArgumentException("Unknown month: " + str);
}
throw new IllegalArgumentException("Unknown month: " + str);
return Month.of(value);
}

private DayOfWeek parseDayOfWeek(String str) {
str = str.toLowerCase();
for (DayOfWeek dow : DayOfWeek.values()) {
if (matches(str, dow.name().toLowerCase())) {
return dow;
}
Integer value = DOW_LOOKUP.get(str.toLowerCase(Locale.ENGLISH));
if (value == null) {
throw new IllegalArgumentException("Unknown day-of-week: " + str);
}
throw new IllegalArgumentException("Unknown day-of-week: " + str);
}

private boolean matches(String str, String search) {
return str.startsWith(search.substring(0, 3)) && search.startsWith(str) && str.length() <= search.length();
return DayOfWeek.of(value);
}

private String parseOptional(String str) {
Expand Down
92 changes: 40 additions & 52 deletions src/test/java/org/threeten/bp/zone/TestTzdbZoneRulesCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,40 +66,21 @@ public void test_parseYear_specific() throws Exception {
}

@Test
public void test_parseYear_min() throws Exception {
public void test_parseYear_minimum() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "mi", 2000), Year.MIN_VALUE);
assertEquals(parseYear(test, "min", 2000), Year.MIN_VALUE);
}

@Test
public void test_parseYear_mini() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "mini", 2000), Year.MIN_VALUE);
}

@Test
public void test_parseYear_minim() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "minim", 2000), Year.MIN_VALUE);
}

@Test
public void test_parseYear_minimu() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "minimu", 2000), Year.MIN_VALUE);
}

@Test
public void test_parseYear_minimum() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "MINIMU", 2000), Year.MIN_VALUE);
assertEquals(parseYear(test, "minimum", 2000), Year.MIN_VALUE);
}


@Test(expectedExceptions=NumberFormatException.class)
public void test_parseYear_minTooShort() throws Exception {
public void test_parseYear_minimumTooShort() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
parseYear(test, "mi", 2000);
parseYear(test, "m", 2000);
}

@Test(expectedExceptions=NumberFormatException.class)
Expand All @@ -109,39 +90,20 @@ public void test_parseYear_minTooLong() throws Exception {
}

@Test
public void test_parseYear_max() throws Exception {
public void test_parseYear_maximum() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "ma", 2000), Year.MAX_VALUE);
assertEquals(parseYear(test, "max", 2000), Year.MAX_VALUE);
}

@Test
public void test_parseYear_maxi() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "maxi", 2000), Year.MAX_VALUE);
}

@Test
public void test_parseYear_maxim() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "maxim", 2000), Year.MAX_VALUE);
}

@Test
public void test_parseYear_maximu() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "maximu", 2000), Year.MAX_VALUE);
}

@Test
public void test_parseYear_maximum() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
assertEquals(parseYear(test, "MAXIMU", 2000), Year.MAX_VALUE);
assertEquals(parseYear(test, "maximum", 2000), Year.MAX_VALUE);
}

@Test(expectedExceptions=NumberFormatException.class)
public void test_parseYear_maxTooShort() throws Exception {
public void test_parseYear_maximumTooShort() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
parseYear(test, "ma", 2000);
parseYear(test, "m", 2000);
}

@Test(expectedExceptions=NumberFormatException.class)
Expand Down Expand Up @@ -218,13 +180,31 @@ public void test_parseMonth() throws Exception {
assertEquals(parseMonth(test, "October"), Month.OCTOBER);
assertEquals(parseMonth(test, "November"), Month.NOVEMBER);
assertEquals(parseMonth(test, "December"), Month.DECEMBER);
assertEquals(parseMonth(test, "Ja"), Month.JANUARY);
assertEquals(parseMonth(test, "Janu"), Month.JANUARY);
assertEquals(parseMonth(test, "Janua"), Month.JANUARY);
assertEquals(parseMonth(test, "Januar"), Month.JANUARY);
assertEquals(parseMonth(test, "F"), Month.FEBRUARY);
assertEquals(parseMonth(test, "MAR"), Month.MARCH);
assertEquals(parseMonth(test, "AP"), Month.APRIL);
assertEquals(parseMonth(test, "MAY"), Month.MAY);
assertEquals(parseMonth(test, "JUN"), Month.JUNE);
assertEquals(parseMonth(test, "JUL"), Month.JULY);
assertEquals(parseMonth(test, "AU"), Month.AUGUST);
assertEquals(parseMonth(test, "S"), Month.SEPTEMBER);
assertEquals(parseMonth(test, "O"), Month.OCTOBER);
assertEquals(parseMonth(test, "N"), Month.NOVEMBER);
assertEquals(parseMonth(test, "D"), Month.DECEMBER);
}

@Test(expectedExceptions=IllegalArgumentException.class)
public void test_parseMonth_invalidMonth() throws Exception {
@Test(expectedExceptions = IllegalArgumentException.class)
public void test_parseMonth_TooShort() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
parseMonth(test, "J");
}

@Test(expectedExceptions = IllegalArgumentException.class)
public void test_parseMonth_invalid() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
parseMonth(test, "ABC");
}
Expand Down Expand Up @@ -269,12 +249,20 @@ public void test_parseDayOfWeek() throws Exception {
assertEquals(parseDayOfWeek(test, "Friday"), DayOfWeek.FRIDAY);
assertEquals(parseDayOfWeek(test, "Saturday"), DayOfWeek.SATURDAY);
assertEquals(parseDayOfWeek(test, "Sunday"), DayOfWeek.SUNDAY);
assertEquals(parseDayOfWeek(test, "M"), DayOfWeek.MONDAY);
assertEquals(parseDayOfWeek(test, "Mo"), DayOfWeek.MONDAY);
assertEquals(parseDayOfWeek(test, "Mond"), DayOfWeek.MONDAY);
assertEquals(parseDayOfWeek(test, "Monda"), DayOfWeek.MONDAY);
}

@Test(expectedExceptions=IllegalArgumentException.class)
public void test_parseDayOfWeek_invalidMonth() throws Exception {
@Test(expectedExceptions = IllegalArgumentException.class)
public void test_parseDayOfWeek_tooShort() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
parseMonth(test, "T");
}

@Test(expectedExceptions = IllegalArgumentException.class)
public void test_parseDayOfWeek_invalid() throws Exception {
TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList<File>(), null, false);
parseMonth(test, "ABC");
}
Expand Down

0 comments on commit 2678e18

Please sign in to comment.