Skip to content

Commit

Permalink
download_unicode_data_files.py: Select latest or a specific version (
Browse files Browse the repository at this point in the history
  • Loading branch information
StephanTLavavej committed Mar 16, 2024
1 parent ab9a15d commit 4ce6bb3
Showing 1 changed file with 35 additions and 11 deletions.
46 changes: 35 additions & 11 deletions tools/unicode_properties_parse/download_unicode_data_files.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,46 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

from pathlib import PurePosixPath
import sys
from urllib.error import HTTPError
from urllib.request import urlretrieve


Unicode_data_files = {
"DerivedCoreProperties.txt": "https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt",
"DerivedGeneralCategory.txt": "https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt",
"EastAsianWidth.txt": "https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt",
"GraphemeBreakProperty.txt": "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt",
"GraphemeBreakTest.txt": "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt",
"emoji-data.txt": "https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt",
}
def get_base_url():
if len(sys.argv) != 2:
sys.exit(f"Usage: python {sys.argv[0]} [latest|<VERSION LIKE 15.0.0>]")

version = sys.argv[1]

if version == "latest":
return "https://unicode.org/Public/UCD/latest/"

return f"https://unicode.org/Public/{version}/"


Unicode_data_files = [
"ucd/DerivedCoreProperties.txt",
"ucd/extracted/DerivedGeneralCategory.txt",
"ucd/EastAsianWidth.txt",
"ucd/auxiliary/GraphemeBreakProperty.txt",
"ucd/auxiliary/GraphemeBreakTest.txt",
"ucd/emoji/emoji-data.txt",
]


def download_unicode_data_files():
for filename, url in Unicode_data_files.items():
print(f"downloading {filename} from {url}")
urlretrieve(url, filename)
base_url = get_base_url()
print(f" Base URL: {base_url}")

for data_file in Unicode_data_files:
url = base_url + data_file
filename = PurePosixPath(data_file).name
print(f"Downloading: {url}")
try:
urlretrieve(url, filename)
except HTTPError as http_error:
sys.exit(f"{http_error}")


if __name__ == "__main__":
Expand Down

0 comments on commit 4ce6bb3

Please sign in to comment.