From 2678e1842e442c8d35bffe19f1da329cc204c70e Mon Sep 17 00:00:00 2001 From: Stephen Colebourne Date: Mon, 16 Sep 2024 20:20:20 +0100 Subject: [PATCH] Enhance parsing of TZDB short-forms (#196) * TZDB data can contain a wacky variety of shortened names * Extend the parser to handle this --- src/changes/changes.xml | 5 + .../bp/zone/TzdbZoneRulesCompiler.java | 88 +++++++++++++----- .../bp/zone/TestTzdbZoneRulesCompiler.java | 92 ++++++++----------- 3 files changed, 110 insertions(+), 75 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index e0cec804e..31e360746 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -8,6 +8,11 @@ + + + Enhance TZDB parsing, handling more shortened forms. + + Update to time-zone data 2024agtz. diff --git a/src/main/java/org/threeten/bp/zone/TzdbZoneRulesCompiler.java b/src/main/java/org/threeten/bp/zone/TzdbZoneRulesCompiler.java index df72ddc2c..090df8c8f 100644 --- a/src/main/java/org/threeten/bp/zone/TzdbZoneRulesCompiler.java +++ b/src/main/java/org/threeten/bp/zone/TzdbZoneRulesCompiler.java @@ -48,7 +48,9 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.SortedMap; @@ -91,6 +93,54 @@ final class TzdbZoneRulesCompiler { .optionalStart().appendLiteral(':').appendValue(SECOND_OF_MINUTE, 2) .toFormatter(); } + private static final Set RULE_LOOKUP = expand("rule", "r"); + private static final Set ZONE_LOOKUP = expand("zone", "z"); + private static final Set LINK_LOOKUP = expand("link", "l"); + private static final Set MIN_YEAR_LOOKUP = expand("minimum", "mi"); + private static final Set MAX_YEAR_LOOKUP = expand("maximum", "ma"); + private static final Set ONLY_YEAR_LOOKUP = expand("only", "o"); + private static final Map MONTH_LOOKUP = new HashMap(); + static { + put(expand("january", "ja"), 1, MONTH_LOOKUP); + put(expand("february", "f"), 2, MONTH_LOOKUP); + put(expand("march", "mar"), 3, MONTH_LOOKUP); + put(expand("april", "ap"), 4, MONTH_LOOKUP); + put(expand("may", "may"), 5, MONTH_LOOKUP); + put(expand("june", "jun"), 6, MONTH_LOOKUP); + put(expand("july", "jul"), 7, MONTH_LOOKUP); + put(expand("august", "au"), 8, MONTH_LOOKUP); + put(expand("september", "s"), 9, MONTH_LOOKUP); + put(expand("october", "o"), 10, MONTH_LOOKUP); + put(expand("november", "n"), 11, MONTH_LOOKUP); + put(expand("december", "d"), 12, MONTH_LOOKUP); + } + private static final Map DOW_LOOKUP = new HashMap(); + static { + put(expand("monday", "m"), 1, DOW_LOOKUP); + put(expand("tuesday", "tu"), 2, DOW_LOOKUP); + put(expand("wednesday", "w"), 3, DOW_LOOKUP); + put(expand("thursday", "th"), 4, DOW_LOOKUP); + put(expand("friday", "f"), 5, DOW_LOOKUP); + put(expand("saturday", "sa"), 6, DOW_LOOKUP); + put(expand("sunday", "su"), 7, DOW_LOOKUP); + } + + private static void put(Set strs, int value, Map map) { + for (Iterator it = strs.iterator(); it.hasNext();) { + map.put(it.next(), value); + } + } + + private static Set expand(String whole, String shortest) { + Set set = new HashSet(); + String code = whole; + while (!code.equals(shortest)) { + set.add(code); + code = code.substring(0, code.length() - 1); + } + set.add(code); + return set; + } /** * Reads a set of TZDB files and builds a single combined data file. @@ -655,7 +705,7 @@ private void parseFile(File file) throws Exception { } else { if (st.hasMoreTokens()) { String first = st.nextToken(); - if (first.equals("Zone")) { + if (ZONE_LOOKUP.contains(first)) { if (st.countTokens() < 3) { printVerbose("Invalid Zone line in file: " + file + ", line: " + line); throw new IllegalArgumentException("Invalid Zone line"); @@ -667,14 +717,14 @@ private void parseFile(File file) throws Exception { } } else { openZone = null; - if (first.equals("Rule")) { + if (RULE_LOOKUP.contains(first)) { if (st.countTokens() < 9) { printVerbose("Invalid Rule line in file: " + file + ", line: " + line); throw new IllegalArgumentException("Invalid Rule line"); } parseRuleLine(st); - } else if (first.equals("Link")) { + } else if (LINK_LOOKUP.contains(first)) { if (st.countTokens() < 2) { printVerbose("Invalid Link line in file: " + file + ", line: " + line); throw new IllegalArgumentException("Invalid Link line"); @@ -798,39 +848,31 @@ private void parseMonthDayTime(StringTokenizer st, TZDBMonthDayTime mdt) { } private int parseYear(String str, int defaultYear) { - str = str.toLowerCase(); - if (matches(str, "minimum")) { + String lower = str.toLowerCase(Locale.ENGLISH); + if (MIN_YEAR_LOOKUP.contains(lower)) { return Year.MIN_VALUE; - } else if (matches(str, "maximum")) { + } else if (MAX_YEAR_LOOKUP.contains(lower)) { return Year.MAX_VALUE; - } else if (str.equals("only")) { + } else if (ONLY_YEAR_LOOKUP.contains(lower)) { return defaultYear; } return Integer.parseInt(str); } private Month parseMonth(String str) { - str = str.toLowerCase(); - for (Month moy : Month.values()) { - if (matches(str, moy.name().toLowerCase())) { - return moy; - } + Integer value = MONTH_LOOKUP.get(str.toLowerCase(Locale.ENGLISH)); + if (value == null) { + throw new IllegalArgumentException("Unknown month: " + str); } - throw new IllegalArgumentException("Unknown month: " + str); + return Month.of(value); } private DayOfWeek parseDayOfWeek(String str) { - str = str.toLowerCase(); - for (DayOfWeek dow : DayOfWeek.values()) { - if (matches(str, dow.name().toLowerCase())) { - return dow; - } + Integer value = DOW_LOOKUP.get(str.toLowerCase(Locale.ENGLISH)); + if (value == null) { + throw new IllegalArgumentException("Unknown day-of-week: " + str); } - throw new IllegalArgumentException("Unknown day-of-week: " + str); - } - - private boolean matches(String str, String search) { - return str.startsWith(search.substring(0, 3)) && search.startsWith(str) && str.length() <= search.length(); + return DayOfWeek.of(value); } private String parseOptional(String str) { diff --git a/src/test/java/org/threeten/bp/zone/TestTzdbZoneRulesCompiler.java b/src/test/java/org/threeten/bp/zone/TestTzdbZoneRulesCompiler.java index b79797d77..f288d2148 100644 --- a/src/test/java/org/threeten/bp/zone/TestTzdbZoneRulesCompiler.java +++ b/src/test/java/org/threeten/bp/zone/TestTzdbZoneRulesCompiler.java @@ -66,40 +66,21 @@ public void test_parseYear_specific() throws Exception { } @Test - public void test_parseYear_min() throws Exception { + public void test_parseYear_minimum() throws Exception { TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); + assertEquals(parseYear(test, "mi", 2000), Year.MIN_VALUE); assertEquals(parseYear(test, "min", 2000), Year.MIN_VALUE); - } - - @Test - public void test_parseYear_mini() throws Exception { - TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); assertEquals(parseYear(test, "mini", 2000), Year.MIN_VALUE); - } - - @Test - public void test_parseYear_minim() throws Exception { - TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); assertEquals(parseYear(test, "minim", 2000), Year.MIN_VALUE); - } - - @Test - public void test_parseYear_minimu() throws Exception { - TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); - assertEquals(parseYear(test, "minimu", 2000), Year.MIN_VALUE); - } - - @Test - public void test_parseYear_minimum() throws Exception { - TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); + assertEquals(parseYear(test, "MINIMU", 2000), Year.MIN_VALUE); assertEquals(parseYear(test, "minimum", 2000), Year.MIN_VALUE); } @Test(expectedExceptions=NumberFormatException.class) - public void test_parseYear_minTooShort() throws Exception { + public void test_parseYear_minimumTooShort() throws Exception { TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); - parseYear(test, "mi", 2000); + parseYear(test, "m", 2000); } @Test(expectedExceptions=NumberFormatException.class) @@ -109,39 +90,20 @@ public void test_parseYear_minTooLong() throws Exception { } @Test - public void test_parseYear_max() throws Exception { + public void test_parseYear_maximum() throws Exception { TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); + assertEquals(parseYear(test, "ma", 2000), Year.MAX_VALUE); assertEquals(parseYear(test, "max", 2000), Year.MAX_VALUE); - } - - @Test - public void test_parseYear_maxi() throws Exception { - TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); assertEquals(parseYear(test, "maxi", 2000), Year.MAX_VALUE); - } - - @Test - public void test_parseYear_maxim() throws Exception { - TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); assertEquals(parseYear(test, "maxim", 2000), Year.MAX_VALUE); - } - - @Test - public void test_parseYear_maximu() throws Exception { - TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); - assertEquals(parseYear(test, "maximu", 2000), Year.MAX_VALUE); - } - - @Test - public void test_parseYear_maximum() throws Exception { - TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); + assertEquals(parseYear(test, "MAXIMU", 2000), Year.MAX_VALUE); assertEquals(parseYear(test, "maximum", 2000), Year.MAX_VALUE); } @Test(expectedExceptions=NumberFormatException.class) - public void test_parseYear_maxTooShort() throws Exception { + public void test_parseYear_maximumTooShort() throws Exception { TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); - parseYear(test, "ma", 2000); + parseYear(test, "m", 2000); } @Test(expectedExceptions=NumberFormatException.class) @@ -218,13 +180,31 @@ public void test_parseMonth() throws Exception { assertEquals(parseMonth(test, "October"), Month.OCTOBER); assertEquals(parseMonth(test, "November"), Month.NOVEMBER); assertEquals(parseMonth(test, "December"), Month.DECEMBER); + assertEquals(parseMonth(test, "Ja"), Month.JANUARY); assertEquals(parseMonth(test, "Janu"), Month.JANUARY); assertEquals(parseMonth(test, "Janua"), Month.JANUARY); assertEquals(parseMonth(test, "Januar"), Month.JANUARY); + assertEquals(parseMonth(test, "F"), Month.FEBRUARY); + assertEquals(parseMonth(test, "MAR"), Month.MARCH); + assertEquals(parseMonth(test, "AP"), Month.APRIL); + assertEquals(parseMonth(test, "MAY"), Month.MAY); + assertEquals(parseMonth(test, "JUN"), Month.JUNE); + assertEquals(parseMonth(test, "JUL"), Month.JULY); + assertEquals(parseMonth(test, "AU"), Month.AUGUST); + assertEquals(parseMonth(test, "S"), Month.SEPTEMBER); + assertEquals(parseMonth(test, "O"), Month.OCTOBER); + assertEquals(parseMonth(test, "N"), Month.NOVEMBER); + assertEquals(parseMonth(test, "D"), Month.DECEMBER); } - @Test(expectedExceptions=IllegalArgumentException.class) - public void test_parseMonth_invalidMonth() throws Exception { + @Test(expectedExceptions = IllegalArgumentException.class) + public void test_parseMonth_TooShort() throws Exception { + TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); + parseMonth(test, "J"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void test_parseMonth_invalid() throws Exception { TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); parseMonth(test, "ABC"); } @@ -269,12 +249,20 @@ public void test_parseDayOfWeek() throws Exception { assertEquals(parseDayOfWeek(test, "Friday"), DayOfWeek.FRIDAY); assertEquals(parseDayOfWeek(test, "Saturday"), DayOfWeek.SATURDAY); assertEquals(parseDayOfWeek(test, "Sunday"), DayOfWeek.SUNDAY); + assertEquals(parseDayOfWeek(test, "M"), DayOfWeek.MONDAY); + assertEquals(parseDayOfWeek(test, "Mo"), DayOfWeek.MONDAY); assertEquals(parseDayOfWeek(test, "Mond"), DayOfWeek.MONDAY); assertEquals(parseDayOfWeek(test, "Monda"), DayOfWeek.MONDAY); } - @Test(expectedExceptions=IllegalArgumentException.class) - public void test_parseDayOfWeek_invalidMonth() throws Exception { + @Test(expectedExceptions = IllegalArgumentException.class) + public void test_parseDayOfWeek_tooShort() throws Exception { + TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); + parseMonth(test, "T"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void test_parseDayOfWeek_invalid() throws Exception { TzdbZoneRulesCompiler test = new TzdbZoneRulesCompiler("2010c", new ArrayList(), null, false); parseMonth(test, "ABC"); }