-
Notifications
You must be signed in to change notification settings - Fork 6
/
stringtool.py
executable file
·225 lines (175 loc) · 8 KB
/
stringtool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""\
Tool for converting to/from string resources. This app has strings that need to
be translated in 2 locations:
- The Firebase database (mirrored in app/testdata/checklistappdev-export.json)
- The strings.xml resource files
As a general process for soliciting translations and updating the app, we would
like to be able to copy all of these to a Google sheet, where translators can
input translations, and then back to the source code. This tool handles both
with a minimal diff footprint to make reviewing easier.
Examples:
# Output all of the strings to a CSV for pasting into Google Sheets
$ stringtool.py tocsv > strings.csv
# Given a CSV from Google Sheets, update this repository with missing strings
$ cat strings.csv | stringtool.py tostrings
"""
import argparse, collections, copy, csv, HTMLParser, itertools, json, os, re, sys
import xml.dom.minidom
import xml.etree.ElementTree
JSON_DB_FILENAME = 'app/testdata/checklistappdev-export.json'
TRANSLATABLE_FIELDS = ('name', 'description')
DEFAULT_LANGUAGE = 'en'
LOCALES = ('uk', 'ru', 'ar', 'es')
STRINGS_FILE_RE = re.compile(r'^strings\.xml$')
def encode_tuple((k, v), encoding='utf-8'):
return (k.encode(encoding), v.encode(encoding) if v != None else v)
def encode_dict(d, encoding='utf-8'):
return dict(map(lambda t: encode_tuple(t, encoding=encoding), d.iteritems()))
def decode_tuple((k, v), encoding='utf-8'):
return (k.decode(encoding), v.decode(encoding) if v != None else v)
def decode_dict(d, encoding='utf-8'):
return dict(map(lambda t: decode_tuple(t, encoding=encoding), d.iteritems()))
def db_translations():
db = None
with open(JSON_DB_FILENAME) as infile:
db = json.load(open(JSON_DB_FILENAME))
for index, item in enumerate(db['checklists']['basic']):
for field in TRANSLATABLE_FIELDS:
record = {
'location': 'db'.format(index),
'field': '[{}]{}'.format(index, field),
DEFAULT_LANGUAGE: item[field]
}
for locale in LOCALES:
record[locale] = item.get('alt', {}).get(locale, {}).get(field, None)
yield record
def strings_xml_filenames():
for (directory, _, files) in os.walk('app/src/main/res'):
for file_ in files:
if STRINGS_FILE_RE.match(file_):
yield os.path.join(directory, file_)
def language_from_filename(filename):
values_dir = os.path.split(os.path.dirname(filename))[1]
try:
return values_dir.rsplit('-')[1]
except IndexError:
return DEFAULT_LANGUAGE
def filename_without_locale(filename):
"""Something like 'res/values-ru/strings.xml' → 'res/values/strings.xml'"""
res_dir = os.path.split(os.path.dirname(filename))[0]
return os.path.join(res_dir, 'values', os.path.basename(filename))
def translation_to_filename_with_locale(translation, locale):
"""Given a translation object and a locale, return the locale specific path,
i.e. res/values-{LOCALE}/strings.xml"""
location = translation['location']
res_dir = os.path.split(os.path.dirname(location))[0]
values_dir = 'values' if locale == DEFAULT_LANGUAGE else 'values-' + locale
return os.path.join(res_dir, values_dir, os.path.basename(location))
def language_translation_from_filename(filename):
language = language_from_filename(filename)
for event, element in xml.etree.ElementTree.iterparse(filename):
if element.tag == 'string':
# a <string> element may contain markup elements, so we can't just take
# the 'text'. beyond that, ~awesome~ etree will html escape non-ascii
# stuff, so we've got to decode the result.
text = u'' + (element.text or u'')
text += u''.join(xml.etree.ElementTree.tostring(e) for e in element)
html_parser = HTMLParser.HTMLParser()
text = html_parser.unescape(text)
yield {
'location': filename_without_locale(filename),
'field': element.attrib['name'],
language: text,
}
def code_translations():
translation_map = collections.defaultdict(lambda: {})
for filename in strings_xml_filenames():
for language_translation in language_translation_from_filename(filename):
key = language_translation['location'] + language_translation['field']
translation_map[key].update(language_translation)
return translation_map.values()
def translations_to_csv(translations, outfile):
fieldnames = ('location', 'field', DEFAULT_LANGUAGE) + LOCALES
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for translation in translations:
writer.writerow(encode_dict(translation))
def csv_to_translations(infile):
return (decode_dict(translation, encoding=infile.encoding or 'utf-8')
for translation in csv.DictReader(infile))
def group_translations_by_file(translations):
file_map = collections.defaultdict(lambda: [])
for translation in translations:
if translation['location'] == 'db':
file_map[translation['location']].append(translation)
else:
for locale in LOCALES + (DEFAULT_LANGUAGE,):
if locale in translation:
filename = translation_to_filename_with_locale(translation, locale)
file_map[filename].append(translation)
return dict(file_map.iteritems())
def update_db_object_with_translations(db, translations):
db = copy.deepcopy(db)
checklist = db['checklists']['basic']
for translation in translations:
raw_field = translation['field']
index = int(raw_field.lstrip('[').split(']')[0])
field = raw_field.split(']')[1]
checklist[index][field] = translation[DEFAULT_LANGUAGE]
for locale in LOCALES:
if translation.get(locale, '') != '':
if locale not in checklist[index]['alt']:
checklist[index]['alt'][locale] = {}
checklist[index]['alt'][locale][field] = translation[locale]
return db
def update_db_with_translations(translations):
db = None
with open(JSON_DB_FILENAME) as infile:
db = json.load(infile, object_pairs_hook=collections.OrderedDict)
db = update_db_object_with_translations(db, translations)
with open(JSON_DB_FILENAME, 'w') as outfile:
json.dump(db, outfile, separators=(',', ': '), indent=2, encoding='utf-8',
ensure_ascii=False)
# a better programmer would break this up into logical pieces of work, but xml
# is such a bummer, so this is a big hack.
def update_xml_with_translations(filename, translations):
locale = language_from_filename(filename)
translation_map = collections.OrderedDict()
copyright_notice = None
with open(filename) as infile:
for event, element in xml.etree.ElementTree.iterparse(infile):
if element.tag == 'string':
text = element.text or ''
text += ''.join(xml.etree.ElementTree.tostring(e) for e in element)
translation_map[element.attrib['name']] = text
infile.seek(0)
copyright_notice = xml.dom.minidom.parse(infile).childNodes[0].toxml()
for translation in translations:
if translation.get(locale, '') != '':
translation_map[translation['field']] = translation[locale]
with open(filename, 'w') as outfile:
outfile.write(copyright_notice.encode('utf-8') + '\n\n<resources>\n')
for key, value in translation_map.iteritems():
value = value.encode('utf-8')
outfile.write(' <string name="{}">{}</string>\n'.format(key, value))
outfile.write('</resources>\n')
def strings_to_csv(outfile=sys.stdout):
translations_to_csv(itertools.chain(code_translations(), db_translations()),
outfile=outfile)
def csv_to_strings(infile=sys.stdin):
translations_by_file = group_translations_by_file(csv_to_translations(infile))
for filename, translations in translations_by_file.iteritems():
if filename == 'db':
update_db_with_translations(map(encode_dict, translations_by_file['db']))
else:
update_xml_with_translations(filename, translations)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('action', choices=('tostrings', 'tocsv'))
if parser.parse_args().action == 'tostrings':
csv_to_strings()
else:
strings_to_csv()