Skip to content

Commit cb7412f

Browse files
committed
syntax: fix empty char class bug in HIR printer
When a character class is empty, the HIR printer would emit '[]', which is not a valid regex. (Since if a ']' immediately follows an opening '[', then the ']' is interpreted literally and not a closing bracket.) Instead, we write '[a&&b]'. We could also do things like '(?u:\P{any})' or '(?-u:[\x00-\xFF])', but '[a&&b]' doesn't require any flags and also seems really obvious: the intersection of two distinct characters is obviously empty.
1 parent cd1c46d commit cb7412f

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

regex-syntax/src/hir/print.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ impl<W: fmt::Write> Visitor for Writer<W> {
125125
}
126126
}
127127
HirKind::Class(hir::Class::Unicode(ref cls)) => {
128+
if cls.ranges().is_empty() {
129+
return self.wtr.write_str("[a&&b]");
130+
}
128131
self.wtr.write_str("[")?;
129132
for range in cls.iter() {
130133
if range.start() == range.end() {
@@ -143,6 +146,9 @@ impl<W: fmt::Write> Visitor for Writer<W> {
143146
self.wtr.write_str("]")?;
144147
}
145148
HirKind::Class(hir::Class::Bytes(ref cls)) => {
149+
if cls.ranges().is_empty() {
150+
return self.wtr.write_str("[a&&b]");
151+
}
146152
self.wtr.write_str("(?-u:[")?;
147153
for range in cls.iter() {
148154
if range.start() == range.end() {
@@ -359,6 +365,11 @@ mod tests {
359365
roundtrip_bytes(r"(?-u)[\[]", r"\[");
360366
roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])");
361367
roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])");
368+
369+
// This tests that an empty character class is correctly roundtripped.
370+
#[cfg(feature = "unicode-gencat")]
371+
roundtrip(r"\P{any}", r"[a&&b]");
372+
roundtrip_bytes(r"(?-u)[^\x00-\xFF]", r"[a&&b]");
362373
}
363374

364375
#[test]

0 commit comments

Comments
 (0)