From 13db7c90d0f8c88bd1e8740aa88239382c114eca Mon Sep 17 00:00:00 2001 From: cpplearner Date: Sun, 3 Mar 2024 12:31:21 +0800 Subject: [PATCH] Tools: add a script to download Unicode data files --- .../download_unicode_data_files.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tools/unicode_properties_parse/download_unicode_data_files.py diff --git a/tools/unicode_properties_parse/download_unicode_data_files.py b/tools/unicode_properties_parse/download_unicode_data_files.py new file mode 100644 index 0000000000..dec81017b4 --- /dev/null +++ b/tools/unicode_properties_parse/download_unicode_data_files.py @@ -0,0 +1,20 @@ +from urllib.request import urlretrieve + + +Unicode_data_files = { + "DerivedCoreProperties.txt": "https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt", + "DerivedGeneralCategory.txt": "https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt", + "EastAsianWidth.txt": "https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt", + "GraphemeBreakProperty.txt": "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt", + "GraphemeBreakText.txt": "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt", + "emoji-data.txt": "https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt", +} + +def download_unicode_data_files(): + for filename, url in Unicode_data_files.items(): + print(f"downloading {filename} from {url}") + urlretrieve(url, filename) + + +if __name__ == "__main__": + download_unicode_data_files()