-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a4df4f1
commit f5e7edf
Showing
9 changed files
with
327 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
.vscode | ||
.mypy_cache | ||
/opencc_data | ||
/cache | ||
/output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Code points of Han characters to be included: | ||
# 1. Code points in Tongyong Guifan Hanzi Biao (通用规范汉字表) | ||
# 2. Code points in OpenCC dictionaries | ||
|
||
from itertools import chain | ||
|
||
s = set() | ||
|
||
with open('cache/通用規範漢字表.txt') as f: | ||
for line in f: | ||
if line and not line.startswith('#'): | ||
c = line[0] | ||
s.add(ord(c)) | ||
|
||
with open('opencc_data/STCharacters.txt') as f1, \ | ||
open('opencc_data/STPhrases.txt') as f2, \ | ||
open('opencc_data/TWVariants.txt') as f3, \ | ||
open('opencc_data/TWPhrases.txt') as f4, \ | ||
open('opencc_data/HKVariants.txt') as f5: | ||
for line in chain(f1, f2, f3, f4, f5): | ||
k, vx = line.rstrip('\n').split('\t') | ||
vs = vx.split(' ') | ||
for c in k: | ||
s.add(ord(c)) | ||
for v in vs: | ||
for c in v: | ||
s.add(ord(c)) | ||
|
||
with open('cache/code_points_han.txt', 'w') as f: | ||
for cp in sorted(s): | ||
if cp > 128: # remove letters in the dictionaries | ||
print(cp, file=f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# Build convert tables | ||
# | ||
# Input: | ||
# - build/t2twp.json | ||
# -> opencc_data/TWPhrases.txt | ||
# -> opencc_data/TWVariants.txt | ||
# - opencc_data/STCharacters.txt | ||
# - opencc_data/STPhrases.txt | ||
# - opencc_data/TWVariants.txt | ||
# - opencc_data/TWPhrases.txt | ||
# | ||
# Output: | ||
# - cache/convert_table_words.txt | ||
# - cache/convert_table_chars.txt | ||
# - cache/convert_table_words_twp.txt | ||
# - cache/convert_table_chars_twp.txt | ||
|
||
from opencc import OpenCC | ||
|
||
def build_entries(twp=False): | ||
with open('opencc_data/STCharacters.txt') as f: # s2t | ||
for line in f: | ||
k, vx = line.rstrip('\n').split('\t') | ||
v = vx.split(' ')[0] # Only select the first candidate | ||
v = t2twp(v) if twp else v # s2t -> s2twp | ||
yield k, v | ||
|
||
with open('opencc_data/STPhrases.txt') as f: # s2t | ||
for line in f: | ||
k, vx = line.rstrip('\n').split('\t') | ||
v = vx.split(' ')[0] # Only select the first candidate | ||
v = t2twp(v) if twp else v # s2t -> s2twp | ||
yield k, v | ||
|
||
if twp: | ||
with open('opencc_data/TWVariants.txt') as f: # t2tw | ||
for line in f: | ||
k, vx = line.rstrip('\n').split('\t') | ||
v = vx.split(' ')[0] # Only select the first candidate | ||
k = t2s(k) # t2tw -> s2tw | ||
yield k, v | ||
|
||
with open('opencc_data/TWPhrases.txt') as f: # t2twp | ||
for line in f: | ||
k, vx = line.rstrip('\n').split('\t') | ||
v = vx.split(' ')[0] # Only select the first candidate | ||
k = t2s(k) # t2twp -> s2twp | ||
yield k, v | ||
|
||
def go(twp=False): | ||
entries = build_entries(twp=twp) | ||
entries = dict(entries) # remove duplicates | ||
entries = sorted(entries.items(), key=lambda k_v: (len(k_v[0]), k_v[0]), reverse=True) # sort | ||
|
||
twp_suffix = '_twp' if twp else '' | ||
|
||
with open(f'cache/convert_table_words{twp_suffix}.txt', 'w') as f1, \ | ||
open(f'cache/convert_table_chars{twp_suffix}.txt', 'w') as f2: | ||
for k, v in entries: | ||
print(k, v, sep='\t', file=f1 if len(k) > 1 else f2) | ||
|
||
if __name__ == '__main__': | ||
# Initialize OpenCC converters | ||
t2s = OpenCC('t2s').convert | ||
t2twp = OpenCC('./build/t2twp').convert | ||
|
||
go() | ||
go(twp=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
@font-face { | ||
font-family: "FanWunMing-Test"; | ||
font-weight: 100; | ||
src: url("../output/FanWunMing-EL.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test"; | ||
font-weight: 300; | ||
src: url("../output/FanWunMing-L.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test"; | ||
font-weight: 400; | ||
src: url("../output/FanWunMing-R.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test"; | ||
font-weight: 500; | ||
src: url("../output/FanWunMing-M.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test"; | ||
font-weight: 600; | ||
src: url("../output/FanWunMing-SB.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test"; | ||
font-weight: 700; | ||
src: url("../output/FanWunMing-B.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test"; | ||
font-weight: 900; | ||
src: url("../output/FanWunMing-H.ttf") format("truetype"); | ||
} | ||
|
||
@font-face { | ||
font-family: "FanWunMing-Test-TW"; | ||
font-weight: 100; | ||
src: url("../output/FanWunMing-TW-EL.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test-TW"; | ||
font-weight: 300; | ||
src: url("../output/FanWunMing-TW-L.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test-TW"; | ||
font-weight: 400; | ||
src: url("../output/FanWunMing-TW-R.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test-TW"; | ||
font-weight: 500; | ||
src: url("../output/FanWunMing-TW-M.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test-TW"; | ||
font-weight: 600; | ||
src: url("../output/FanWunMing-TW-SB.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test-TW"; | ||
font-weight: 700; | ||
src: url("../output/FanWunMing-TW-B.ttf") format("truetype"); | ||
} | ||
@font-face { | ||
font-family: "FanWunMing-Test-TW"; | ||
font-weight: 900; | ||
src: url("../output/FanWunMing-TW-H.ttf") format("truetype"); | ||
} | ||
|
||
:lang(en) { font-family: sans-serif, sans-serif; } | ||
:lang(zh-CN) { font-family: 'FanWunMing-Test', serif; } | ||
:lang(zh-CN).tw { font-family: 'FanWunMing-Test-TW', serif; } | ||
|
||
.w100 { font-weight: 100; } | ||
.w300 { font-weight: 300; } | ||
.w400 { font-weight: 400; } | ||
.w500 { font-weight: 500; } | ||
.w600 { font-weight: 600; } | ||
.w700 { font-weight: 700; } | ||
.w900 { font-weight: 900; } |
Oops, something went wrong.