Skip to content

Commit

Permalink
Run black
Browse files Browse the repository at this point in the history
  • Loading branch information
Kijewski committed Jun 23, 2023
1 parent 395daa0 commit 3480c48
Show file tree
Hide file tree
Showing 12 changed files with 392 additions and 332 deletions.
74 changes: 39 additions & 35 deletions make_decoder_recursive_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,54 +8,58 @@


def generate(out):
lst = ['DRS_fail'] * 128
lst[ord('n')] = 'DRS_null'
lst[ord('t')] = 'DRS_true'
lst[ord('f')] = 'DRS_false'
lst[ord('I')] = 'DRS_inf'
lst[ord('N')] = 'DRS_nan'
lst[ord('"')] = 'DRS_string'
lst[ord("'")] = 'DRS_string'
lst[ord('{')] = 'DRS_recursive'
lst[ord('[')] = 'DRS_recursive'
for c in '+-.0123456789':
lst[ord(c)] = 'DRS_number'

print('#ifndef JSON5EncoderCpp_decoder_recursive_select', file=out)
print('#define JSON5EncoderCpp_decoder_recursive_select', file=out)
lst = ["DRS_fail"] * 128
lst[ord("n")] = "DRS_null"
lst[ord("t")] = "DRS_true"
lst[ord("f")] = "DRS_false"
lst[ord("I")] = "DRS_inf"
lst[ord("N")] = "DRS_nan"
lst[ord('"')] = "DRS_string"
lst[ord("'")] = "DRS_string"
lst[ord("{")] = "DRS_recursive"
lst[ord("[")] = "DRS_recursive"
for c in "+-.0123456789":
lst[ord(c)] = "DRS_number"

print("#ifndef JSON5EncoderCpp_decoder_recursive_select", file=out)
print("#define JSON5EncoderCpp_decoder_recursive_select", file=out)
print(file=out)
print('// GENERATED FILE', file=out)
print('// All changes will be lost.', file=out)
print("// GENERATED FILE", file=out)
print("// All changes will be lost.", file=out)
print(file=out)
print('#include <cstdint>', file=out)
print("#include <cstdint>", file=out)
print(file=out)
print('namespace JSON5EncoderCpp {', file=out)
print('inline namespace {', file=out)
print("namespace JSON5EncoderCpp {", file=out)
print("inline namespace {", file=out)
print(file=out)
print('enum DrsKind : std::uint8_t {', file=out)
print(' DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive', file=out)
print('};', file=out)
print("enum DrsKind : std::uint8_t {", file=out)
print(
" DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive",
file=out,
)
print("};", file=out)
print(file=out)
print('static const DrsKind drs_lookup[128] = {', file=out)
print("static const DrsKind drs_lookup[128] = {", file=out)
for chunk in chunked(lst, 8):
print(' ', end='', file=out)
print(" ", end="", file=out)
for t in chunk:
print(' ', t, ',', sep='', end='', file=out)
print(" ", t, ",", sep="", end="", file=out)
print(file=out)
print('};', file=out)
print("};", file=out)
print(file=out)
print('} // anonymous inline namespace', sep='', file=out)
print('} // namespace JSON5EncoderCpp', sep='', file=out)
print("} // anonymous inline namespace", sep="", file=out)
print("} // namespace JSON5EncoderCpp", sep="", file=out)
print(file=out)
print('#endif', sep='', file=out)
print("#endif", sep="", file=out)


argparser = ArgumentParser(description='Generate src/_decoder_recursive_select.hpp')
argparser.add_argument('input', nargs='?', type=Path, default=Path('src/_decoder_recursive_select.hpp'))
argparser = ArgumentParser(description="Generate src/_decoder_recursive_select.hpp")
argparser.add_argument(
"input", nargs="?", type=Path, default=Path("src/_decoder_recursive_select.hpp")
)

if __name__ == '__main__':
if __name__ == "__main__":
basicConfig(level=DEBUG)
args = argparser.parse_args()
with open(str(args.input.resolve()), 'wt') as out:
with open(str(args.input.resolve()), "wt") as out:
generate(out)

73 changes: 41 additions & 32 deletions make_escape_dct.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,56 @@

def generate(f):
unescaped = 0
print('const EscapeDct::Items EscapeDct::items = {', file=f)
print("const EscapeDct::Items EscapeDct::items = {", file=f)
for c in range(0x100):
if c == ord('\\'):
s = '\\\\'
elif c == ord('\b'):
s = '\\b'
elif c == ord('\f'):
s = '\\f'
elif c == ord('\n'):
s = '\\n'
elif c == ord('\r'):
s = '\\r'
elif c == ord('\t'):
s = '\\t'
if c == ord("\\"):
s = "\\\\"
elif c == ord("\b"):
s = "\\b"
elif c == ord("\f"):
s = "\\f"
elif c == ord("\n"):
s = "\\n"
elif c == ord("\r"):
s = "\\r"
elif c == ord("\t"):
s = "\\t"
elif c == ord('"'):
s = '\\"'
elif (c < 0x20) or (c >= 0x7f) or (chr(c) in "'&<>\\"):
s = f'\\u{c:04x}'
elif (c < 0x20) or (c >= 0x7F) or (chr(c) in "'&<>\\"):
s = f"\\u{c:04x}"
else:
s = f'{c:c}'
s = f"{c:c}"
if c < 128:
unescaped |= 1 << c

t = [str(len(s))] + [
f"'{c}'" if c != '\\' else f"'\\\\'"
for c in s
] + ['0'] * 6
l = ', '.join(t[:8])
print(f' {{ {l:35s} }}, /* 0x{c:02x} {chr(c)!r} */', file=f)
print('};', file=f)
t = (
[str(len(s))]
+ [f"'{c}'" if c != "\\" else f"'\\\\'" for c in s]
+ ["0"] * 6
)
l = ", ".join(t[:8])
print(f" {{ {l:35s} }}, /* 0x{c:02x} {chr(c)!r} */", file=f)
print("};", file=f)

escaped = unescaped ^ ((1 << 128) - 1)
print(f'const std::uint64_t EscapeDct::is_escaped_lo = UINT64_C(0x{(escaped & ((1 << 64) - 1)):016x});', file=f)
print(f'const std::uint64_t EscapeDct::is_escaped_hi = UINT64_C(0x{(escaped >> 64):016x});', file=f)


argparser = ArgumentParser(description='Generate src/_escape_dct.hpp')
argparser.add_argument('input', nargs='?', type=Path, default=Path('src/_escape_dct.hpp'))

if __name__ == '__main__':
print(
f"const std::uint64_t EscapeDct::is_escaped_lo = UINT64_C(0x{(escaped & ((1 << 64) - 1)):016x});",
file=f,
)
print(
f"const std::uint64_t EscapeDct::is_escaped_hi = UINT64_C(0x{(escaped >> 64):016x});",
file=f,
)


argparser = ArgumentParser(description="Generate src/_escape_dct.hpp")
argparser.add_argument(
"input", nargs="?", type=Path, default=Path("src/_escape_dct.hpp")
)

if __name__ == "__main__":
basicConfig(level=DEBUG)
args = argparser.parse_args()
with open(str(args.input.resolve()), 'wt') as out:
with open(str(args.input.resolve()), "wt") as out:
generate(out)
116 changes: 65 additions & 51 deletions make_unicode_categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,24 @@ def main(input_file, output_file):
IdentifierPart = 3

cat_indices = {
'zs': WhiteSpace,

'lc': IdentifierStart,
'll': IdentifierStart,
'lm': IdentifierStart,
'lo': IdentifierStart,
'lt': IdentifierStart,
'lu': IdentifierStart,
'nl': IdentifierStart,

'mc': IdentifierPart,
'mn': IdentifierPart,
'pc': IdentifierPart,
'nd': IdentifierPart,
"zs": WhiteSpace,
"lc": IdentifierStart,
"ll": IdentifierStart,
"lm": IdentifierStart,
"lo": IdentifierStart,
"lt": IdentifierStart,
"lu": IdentifierStart,
"nl": IdentifierStart,
"mc": IdentifierPart,
"mn": IdentifierPart,
"pc": IdentifierPart,
"nd": IdentifierPart,
}

planes = defaultdict(lambda: [0] * 0x10000)

for input_line in input_file:
m = match(r'^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+([A-Z][a-z])', input_line)
m = match(r"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+([A-Z][a-z])", input_line)
if not m:
continue
start, end, cat = m.groups()
Expand All @@ -48,75 +46,91 @@ def main(input_file, output_file):
planes[i // 0x10000][i % 0x10000] = idx

# per: https://spec.json5.org/#white-space
for i in (0x9, 0xa, 0xb, 0xc, 0xd, 0x20, 0xa0, 0x2028, 0x2028, 0x2029, 0xfeff):
for i in (0x9, 0xA, 0xB, 0xC, 0xD, 0x20, 0xA0, 0x2028, 0x2028, 0x2029, 0xFEFF):
planes[0][i] = WhiteSpace

# per: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
for i in (ord('$'), ord('_'), ord('\\')):
for i in (ord("$"), ord("_"), ord("\\")):
planes[0][i] = IdentifierStart

# per: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
for i in (0x200C, 0x200D):
planes[0][i] = IdentifierPart

print('#ifndef JSON5EncoderCpp_unicode_cat_of', file=output_file)
print('#define JSON5EncoderCpp_unicode_cat_of', file=output_file)
print("#ifndef JSON5EncoderCpp_unicode_cat_of", file=output_file)
print("#define JSON5EncoderCpp_unicode_cat_of", file=output_file)
print(file=output_file)
print('// GENERATED FILE', file=output_file)
print('// All changes will be lost.', file=output_file)
print("// GENERATED FILE", file=output_file)
print("// All changes will be lost.", file=output_file)
print(file=output_file)
print('#include <cstdint>', file=output_file)
print("#include <cstdint>", file=output_file)
print(file=output_file)
print('namespace JSON5EncoderCpp {', file=output_file)
print('inline namespace {', file=output_file)
print("namespace JSON5EncoderCpp {", file=output_file)
print("inline namespace {", file=output_file)
print(file=output_file)
print('static unsigned unicode_cat_of(std::uint32_t codepoint) {', file=output_file)
print("static unsigned unicode_cat_of(std::uint32_t codepoint) {", file=output_file)

print(' static std::uint8_t plane_X[0x10000 / 4] = {0};', file=output_file)
print(" static std::uint8_t plane_X[0x10000 / 4] = {0};", file=output_file)
print(file=output_file)

for plane_idx, plane_data in planes.items():
print(' static std::uint8_t plane_' + str(plane_idx) + '[0x10000 / 4] = {', file=output_file)
for chunk in chunked(plane_data, 4*16):
print(' ', end='', file=output_file)
print(
" static std::uint8_t plane_" + str(plane_idx) + "[0x10000 / 4] = {",
file=output_file,
)
for chunk in chunked(plane_data, 4 * 16):
print(" ", end="", file=output_file)
for value in chunked(chunk, 4):
value = reduce(lambda a, i: ((a << 2) | i), reversed(value), 0)
print('0x{:02x}u'.format(value), end=', ', file=output_file)
print("0x{:02x}u".format(value), end=", ", file=output_file)
print(file=output_file)
print(' };', file=output_file)
print(" };", file=output_file)
print(file=output_file)

print(' static std::uint8_t *planes[17] = {', end='', file=output_file)
print(" static std::uint8_t *planes[17] = {", end="", file=output_file)
for plane_idx in range(0, 17):
if plane_idx % 8 == 0:
print('\n ', end='', file=output_file)
print("\n ", end="", file=output_file)
if plane_idx in planes:
print('plane_' + str(plane_idx) + ', ', end='', file=output_file)
print("plane_" + str(plane_idx) + ", ", end="", file=output_file)
else:
print('plane_X, ', end='', file=output_file)
print("plane_X, ", end="", file=output_file)
print(file=output_file)
print(' };', file=output_file)
print(" };", file=output_file)
print(file=output_file)

print(' std::uint16_t plane_idx = std::uint16_t(codepoint / 0x10000);', file=output_file)
print(' if (JSON5EncoderCpp_expect(plane_idx > 16, false)) return 1;', file=output_file)
print(' std::uint16_t datum_idx = std::uint16_t(codepoint & 0xffff);', file=output_file)
print(' const std::uint8_t *plane = planes[plane_idx];', file=output_file)
print(' return (plane[datum_idx / 4] >> (2 * (datum_idx % 4))) % 4;', file=output_file)
print('}', file=output_file)
print(
" std::uint16_t plane_idx = std::uint16_t(codepoint / 0x10000);",
file=output_file,
)
print(
" if (JSON5EncoderCpp_expect(plane_idx > 16, false)) return 1;",
file=output_file,
)
print(
" std::uint16_t datum_idx = std::uint16_t(codepoint & 0xffff);",
file=output_file,
)
print(" const std::uint8_t *plane = planes[plane_idx];", file=output_file)
print(
" return (plane[datum_idx / 4] >> (2 * (datum_idx % 4))) % 4;",
file=output_file,
)
print("}", file=output_file)
print(file=output_file)
print('}', file=output_file)
print('}', file=output_file)
print("}", file=output_file)
print("}", file=output_file)
print(file=output_file)
print('#endif', file=output_file)
print("#endif", file=output_file)


argparser = ArgumentParser(description='Generate Unicode Category Matcher(s)')
argparser.add_argument('input', nargs='?', type=Path, default=Path('/dev/stdin'))
argparser.add_argument('output', nargs='?', type=Path, default=Path('/dev/stdout'))
argparser = ArgumentParser(description="Generate Unicode Category Matcher(s)")
argparser.add_argument("input", nargs="?", type=Path, default=Path("/dev/stdin"))
argparser.add_argument("output", nargs="?", type=Path, default=Path("/dev/stdout"))

if __name__ == '__main__':
if __name__ == "__main__":
args = argparser.parse_args()
with open(str(args.input.resolve()), 'rt') as input_file, \
open(str(args.output.resolve()), 'wt') as output_file:
with open(str(args.input.resolve()), "rt") as input_file, open(
str(args.output.resolve()), "wt"
) as output_file:
raise SystemExit(main(input_file, output_file))
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,5 @@
requires = [
"Cython",
"setuptools",
"wheel",
]
build-backend = "setuptools.build_meta"
2 changes: 2 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
black
build
colorama == 0.4.*
cython == 0.*
more_itertools == 8.*
mypy
setuptools
wheel
Loading

0 comments on commit 3480c48

Please sign in to comment.