From 4ce6bb37dd11ad69c455214fbd0b28f32f50de44 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 15 Mar 2024 22:45:55 -0700 Subject: [PATCH] `download_unicode_data_files.py`: Select latest or a specific version (#4469) --- .../download_unicode_data_files.py | 46 ++++++++++++++----- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/tools/unicode_properties_parse/download_unicode_data_files.py b/tools/unicode_properties_parse/download_unicode_data_files.py index bf5c587fd1..7fada90868 100644 --- a/tools/unicode_properties_parse/download_unicode_data_files.py +++ b/tools/unicode_properties_parse/download_unicode_data_files.py @@ -1,22 +1,46 @@ # Copyright (c) Microsoft Corporation. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +from pathlib import PurePosixPath +import sys +from urllib.error import HTTPError from urllib.request import urlretrieve -Unicode_data_files = { - "DerivedCoreProperties.txt": "https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt", - "DerivedGeneralCategory.txt": "https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt", - "EastAsianWidth.txt": "https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt", - "GraphemeBreakProperty.txt": "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt", - "GraphemeBreakTest.txt": "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt", - "emoji-data.txt": "https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt", -} +def get_base_url(): + if len(sys.argv) != 2: + sys.exit(f"Usage: python {sys.argv[0]} [latest|]") + + version = sys.argv[1] + + if version == "latest": + return "https://unicode.org/Public/UCD/latest/" + + return f"https://unicode.org/Public/{version}/" + + +Unicode_data_files = [ + "ucd/DerivedCoreProperties.txt", + "ucd/extracted/DerivedGeneralCategory.txt", + "ucd/EastAsianWidth.txt", + "ucd/auxiliary/GraphemeBreakProperty.txt", + "ucd/auxiliary/GraphemeBreakTest.txt", + "ucd/emoji/emoji-data.txt", +] + def download_unicode_data_files(): - for filename, url in Unicode_data_files.items(): - print(f"downloading {filename} from {url}") - urlretrieve(url, filename) + base_url = get_base_url() + print(f" Base URL: {base_url}") + + for data_file in Unicode_data_files: + url = base_url + data_file + filename = PurePosixPath(data_file).name + print(f"Downloading: {url}") + try: + urlretrieve(url, filename) + except HTTPError as http_error: + sys.exit(f"{http_error}") if __name__ == "__main__":