From 75c9d5e3fd515d8332fdf0118b432b9a2b868b0c Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 12 Nov 2023 15:39:54 +0500 Subject: [PATCH 1/6] Fix return type of XmlSource::read_bytes_until - we should know if byte was found or not Co-authored-by: Daniel Alley --- src/reader/async_tokio.rs | 2 +- src/reader/buffered_reader.rs | 8 +--- src/reader/mod.rs | 79 +++++++++++++++++------------------ src/reader/slice_reader.rs | 13 +++--- src/reader/state.rs | 10 ++++- 5 files changed, 55 insertions(+), 57 deletions(-) diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index 207c1b64..1cdab220 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -4,13 +4,13 @@ use tokio::io::{self, AsyncBufRead, AsyncBufReadExt}; +use crate::errors::{Error, Result, SyntaxError}; use crate::events::Event; use crate::name::{QName, ResolveResult}; use crate::reader::buffered_reader::impl_buffered_source; use crate::reader::{ is_whitespace, BangType, NsReader, ParseState, ReadElementState, Reader, Span, }; -use crate::{Error, Result}; /// A struct for read XML asynchronously from an [`AsyncBufRead`]. /// diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index a614d603..593d4eba 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -54,7 +54,7 @@ macro_rules! impl_buffered_source { byte: u8, buf: &'b mut Vec, position: &mut usize, - ) -> Result> { + ) -> Result<(&'b [u8], bool)> { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); @@ -90,11 +90,7 @@ macro_rules! impl_buffered_source { } *position += read; - if read == 0 { - Ok(None) - } else { - Ok(Some(&buf[start..])) - } + Ok((&buf[start..], done)) } $($async)? fn read_bang_element $(<$lf>)? ( diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 48c16c75..d1fe085d 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -284,8 +284,7 @@ macro_rules! read_until_open { $(.$await)? { // Return Text event with `bytes` content - Ok(Some(bytes)) => $self.state.emit_text(bytes).map(Ok), - Ok(None) => Ok(Ok(Event::Eof)), + Ok((bytes, _found)) => $self.state.emit_text(bytes).map(Ok), Err(e) => Err(e), } }}; @@ -334,8 +333,8 @@ macro_rules! read_until_close { .read_bytes_until(b'>', $buf, &mut $self.state.offset) $(.$await)? { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => $self.state.emit_end(bytes), + Ok((bytes, true)) => $self.state.emit_end(bytes), + Ok((_, false)) => Err(Error::Syntax(SyntaxError::UnclosedTag)), Err(e) => Err(e), }, // `', $buf, &mut $self.state.offset) $(.$await)? { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => $self.state.emit_question_mark(bytes), + Ok((bytes, true)) => $self.state.emit_question_mark(bytes), + Ok((_, false)) => Err(Error::Syntax(SyntaxError::UnclosedPIOrXmlDecl)), Err(e) => Err(e), }, // `<...` - opening or self-closed tag @@ -741,8 +740,8 @@ trait XmlSource<'r, B> { /// Read input until `byte` is found or end of input is reached. /// - /// Returns a slice of data read up to `byte`, which does not include into result. - /// If input (`Self`) is exhausted, returns `None`. + /// Returns a slice of data read up to `byte` (exclusive), + /// and a flag noting whether `byte` was found in the input or not. /// /// # Example /// @@ -753,7 +752,7 @@ trait XmlSource<'r, B> { /// /// assert_eq!( /// input.read_bytes_until(b'*', (), &mut position).unwrap(), - /// Some(b"abc".as_ref()) + /// (b"abc".as_ref(), true) /// ); /// assert_eq!(position, 4); // position after the symbol matched /// ``` @@ -770,7 +769,7 @@ trait XmlSource<'r, B> { byte: u8, buf: B, position: &mut usize, - ) -> Result>; + ) -> Result<(&'r [u8], bool)>; /// Read input until comment, CDATA or processing instruction is finished. /// @@ -998,13 +997,13 @@ mod test { let mut input = b"".as_ref(); // ^= 0 + let (bytes, found) = $source(&mut input) + .read_bytes_until(b'*', buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bytes_until(b'*', buf, &mut position) - $(.$await)? - .unwrap() - .map(Bytes), - None + (Bytes(bytes), found), + (Bytes(b""), false) ); assert_eq!(position, 0); } @@ -1018,13 +1017,13 @@ mod test { let mut input = b"abcdef".as_ref(); // ^= 6 + let (bytes, found) = $source(&mut input) + .read_bytes_until(b'*', buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bytes_until(b'*', buf, &mut position) - $(.$await)? - .unwrap() - .map(Bytes), - Some(Bytes(b"abcdef")) + (Bytes(bytes), found), + (Bytes(b"abcdef"), false) ); assert_eq!(position, 6); } @@ -1039,13 +1038,13 @@ mod test { let mut input = b"*abcdef".as_ref(); // ^= 1 + let (bytes, found) = $source(&mut input) + .read_bytes_until(b'*', buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bytes_until(b'*', buf, &mut position) - $(.$await)? - .unwrap() - .map(Bytes), - Some(Bytes(b"")) + (Bytes(bytes), found), + (Bytes(b""), true) ); assert_eq!(position, 1); // position after the symbol matched } @@ -1060,13 +1059,13 @@ mod test { let mut input = b"abc*def".as_ref(); // ^= 4 + let (bytes, found) = $source(&mut input) + .read_bytes_until(b'*', buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bytes_until(b'*', buf, &mut position) - $(.$await)? - .unwrap() - .map(Bytes), - Some(Bytes(b"abc")) + (Bytes(bytes), found), + (Bytes(b"abc"), true) ); assert_eq!(position, 4); // position after the symbol matched } @@ -1081,13 +1080,13 @@ mod test { let mut input = b"abcdef*".as_ref(); // ^= 7 + let (bytes, found) = $source(&mut input) + .read_bytes_until(b'*', buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bytes_until(b'*', buf, &mut position) - $(.$await)? - .unwrap() - .map(Bytes), - Some(Bytes(b"abcdef")) + (Bytes(bytes), found), + (Bytes(b"abcdef"), true) ); assert_eq!(position, 7); // position after the symbol matched } diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 8f59ca04..976f709f 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -260,24 +260,21 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { byte: u8, _buf: (), position: &mut usize, - ) -> Result> { + ) -> Result<(&'a [u8], bool)> { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); - if self.is_empty() { - return Ok(None); - } - Ok(Some(if let Some(i) = memchr::memchr(byte, self) { + if let Some(i) = memchr::memchr(byte, self) { *position += i + 1; let bytes = &self[..i]; *self = &self[i + 1..]; - bytes + Ok((bytes, true)) } else { *position += self.len(); let bytes = &self[..]; *self = &[]; - bytes - })) + Ok((bytes, false)) + } } fn read_bang_element( diff --git a/src/reader/state.rs b/src/reader/state.rs index 3371cf44..3597b3d3 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -49,12 +49,14 @@ pub(super) struct ReaderState { } impl ReaderState { - /// Trims whitespaces from `bytes`, if required, and returns a [`Text`] event. + /// Trims end whitespaces from `bytes`, if required, and returns a [`Text`] + /// event or an [`Eof`] event, if text after trimming is empty. /// /// # Parameters /// - `bytes`: data from the start of stream to the first `<` or from `>` to `<` /// /// [`Text`]: Event::Text + /// [`Eof`]: Event::Eof pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> Result> { let mut content = bytes; @@ -67,7 +69,11 @@ impl ReaderState { content = &bytes[..len]; } - Ok(Event::Text(BytesText::wrap(content, self.decoder()))) + if content.is_empty() { + Ok(Event::Eof) + } else { + Ok(Event::Text(BytesText::wrap(content, self.decoder()))) + } } /// reads `BytesElement` starting with a `!`, From 8d14e318276eb8e496cef5967d30f985a3516bd3 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 11 Nov 2023 21:04:12 +0500 Subject: [PATCH 2/6] Add regression test for #622 failures: issue622 --- tests/issues.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/issues.rs b/tests/issues.rs index da9918d2..320b528f 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -4,11 +4,10 @@ use std::sync::mpsc; -use quick_xml::errors::{IllFormedError, SyntaxError}; +use quick_xml::errors::{Error, IllFormedError, SyntaxError}; use quick_xml::events::{BytesDecl, BytesStart, BytesText, Event}; use quick_xml::name::QName; use quick_xml::reader::Reader; -use quick_xml::Error; /// Regression test for https://github.com/tafia/quick-xml/issues/94 #[test] @@ -231,3 +230,19 @@ mod issue604 { assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); } } + +/// Regression test for https://github.com/tafia/quick-xml/issues/622 +#[test] +fn issue622() { + let mut reader = Reader::from_str("><"); + reader.config_mut().trim_text(true); + + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::from_escaped(">")) + ); + match reader.read_event() { + Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag), + x => panic!("Expected `Err(Syntax(_))`, but got `{:?}`", x), + } +} From 3b5d5b14ac53673ded64574d0ca58ca8511bd36d Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 12 Nov 2023 20:34:16 +0500 Subject: [PATCH 3/6] Fix incorrect parsing of not closed tags --- Changelog.md | 3 +++ src/reader/mod.rs | 15 ++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Changelog.md b/Changelog.md index dc4b422b..986b19fd 100644 --- a/Changelog.md +++ b/Changelog.md @@ -23,6 +23,8 @@ configuration is serializable. ### Bug Fixes +- [#622]: Fix wrong disregarding of not closed markup, such as lone `<`. + ### Misc Changes - [#675]: Minimum supported version of serde raised to 1.0.139 @@ -36,6 +38,7 @@ configuration is serializable. - `Error::UnexpectedToken` replaced by `IllFormedError::DoubleHyphenInComment` [#513]: https://github.com/tafia/quick-xml/issues/513 +[#622]: https://github.com/tafia/quick-xml/issues/622 [#675]: https://github.com/tafia/quick-xml/pull/675 [#677]: https://github.com/tafia/quick-xml/pull/677 diff --git a/src/reader/mod.rs b/src/reader/mod.rs index d1fe085d..b197e065 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -267,14 +267,13 @@ macro_rules! read_until_open { $read_event:ident $(, $await:ident)? ) => {{ - $self.state.state = ParseState::OpenedTag; - if $self.state.config.trim_text_start { $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?; } // If we already at the `<` symbol, do not try to return an empty Text event if $reader.skip_one(b'<', &mut $self.state.offset) $(.$await)? ? { + $self.state.state = ParseState::OpenedTag; // Pass $buf to the next next iteration of parsing loop return Ok(Err($buf)); } @@ -283,8 +282,13 @@ macro_rules! read_until_open { .read_bytes_until(b'<', $buf, &mut $self.state.offset) $(.$await)? { - // Return Text event with `bytes` content - Ok((bytes, _found)) => $self.state.emit_text(bytes).map(Ok), + Ok((bytes, found)) => { + if found { + $self.state.state = ParseState::OpenedTag; + } + // Return Text event with `bytes` content or Eof if bytes is empty + $self.state.emit_text(bytes).map(Ok) + } Err(e) => Err(e), } }}; @@ -355,7 +359,8 @@ macro_rules! read_until_close { Ok(Some(bytes)) => $self.state.emit_start(bytes), Err(e) => Err(e), }, - Ok(None) => Ok(Event::Eof), + // `<` - syntax error, tag not closed + Ok(None) => Err(Error::Syntax(SyntaxError::UnclosedTag)), Err(e) => Err(e), } }}; From 79489b53910c7667be4eeedfee2e010b099edfe7 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 12 Nov 2023 01:02:52 +0500 Subject: [PATCH 4/6] Fix return type of XmlSource::read_element - it shouldn't return None, because we inside a tag An opened `<` character was already read when we call read_element, so we MUST to find `>` or return a syntax error --- src/reader/buffered_reader.rs | 8 ++--- src/reader/mod.rs | 63 ++++++++++++++++++----------------- src/reader/slice_reader.rs | 8 ++--- 3 files changed, 38 insertions(+), 41 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 593d4eba..a6f42121 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -7,7 +7,7 @@ use std::path::Path; use memchr; -use crate::errors::{Error, Result}; +use crate::errors::{Error, Result, SyntaxError}; use crate::events::Event; use crate::name::QName; use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, Span, XmlSource}; @@ -151,7 +151,7 @@ macro_rules! impl_buffered_source { &mut self, buf: &'b mut Vec, position: &mut usize, - ) -> Result> { + ) -> Result<&'b [u8]> { let mut state = ReadElementState::Elem; let mut read = 0; @@ -187,9 +187,9 @@ macro_rules! impl_buffered_source { } if read == 0 { - Ok(None) + Err(Error::Syntax(SyntaxError::UnclosedTag)) } else { - Ok(Some(&buf[start..])) + Ok(&buf[start..]) } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index b197e065..1f266f64 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -355,8 +355,7 @@ macro_rules! read_until_close { .read_element($buf, &mut $self.state.offset) $(.$await)? { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => $self.state.emit_start(bytes), + Ok(bytes) => $self.state.emit_start(bytes), Err(e) => Err(e), }, // `<` - syntax error, tag not closed @@ -798,8 +797,8 @@ trait XmlSource<'r, B> { ) -> Result>; /// Read input until XML element is closed by approaching a `>` symbol. - /// Returns `Some(buffer)` that contains a data between `<` and `>` or - /// `None` if end-of-input was reached and nothing was read. + /// Returns a buffer that contains a data between `<` and `>` or + /// [`SyntaxError::UnclosedTag`] if end-of-input was reached before reading `>`. /// /// Derived from `read_until`, but modified to handle XML attributes /// using a minimal state machine. @@ -819,7 +818,7 @@ trait XmlSource<'r, B> { /// /// [defined]: https://www.w3.org/TR/xml11/#NT-AttValue /// [events]: crate::events::Event - fn read_element(&mut self, buf: B, position: &mut usize) -> Result>; + fn read_element(&mut self, buf: B, position: &mut usize) -> Result<&'r [u8]>; /// Consume and discard all the whitespace until the next non-whitespace /// character or EOF. @@ -1487,6 +1486,7 @@ mod test { mod read_element { use super::*; + use crate::errors::{Error, SyntaxError}; use crate::utils::Bytes; use pretty_assertions::assert_eq; @@ -1498,16 +1498,18 @@ mod test { let mut input = b"".as_ref(); // ^= 0 - assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - None - ); + match $source(&mut input).read_element(buf, &mut position) $(.$await)? { + Err(Error::Syntax(SyntaxError::UnclosedTag)) => {} + x => panic!( + "Expected `Err(Syntax(UnclosedTag))`, but got `{:?}`", + x + ), + } assert_eq!(position, 0); } mod open { use super::*; - use crate::utils::Bytes; use pretty_assertions::assert_eq; #[$test] @@ -1518,8 +1520,8 @@ mod test { // ^= 1 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(b"")) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(b"") ); assert_eq!(position, 1); } @@ -1532,8 +1534,8 @@ mod test { // ^= 4 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(b"tag")) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(b"tag") ); assert_eq!(position, 4); } @@ -1546,8 +1548,8 @@ mod test { // ^= 2 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(b":")) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(b":") ); assert_eq!(position, 2); } @@ -1560,8 +1562,8 @@ mod test { // ^= 5 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(b":tag")) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(b":tag") ); assert_eq!(position, 5); } @@ -1574,8 +1576,8 @@ mod test { // ^= 38 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#)) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#) ); assert_eq!(position, 38); } @@ -1583,7 +1585,6 @@ mod test { mod self_closed { use super::*; - use crate::utils::Bytes; use pretty_assertions::assert_eq; #[$test] @@ -1594,8 +1595,8 @@ mod test { // ^= 2 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(b"/")) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(b"/") ); assert_eq!(position, 2); } @@ -1608,8 +1609,8 @@ mod test { // ^= 5 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(b"tag/")) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(b"tag/") ); assert_eq!(position, 5); } @@ -1622,8 +1623,8 @@ mod test { // ^= 3 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(b":/")) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(b":/") ); assert_eq!(position, 3); } @@ -1636,8 +1637,8 @@ mod test { // ^= 6 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(b":tag/")) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(b":tag/") ); assert_eq!(position, 6); } @@ -1650,8 +1651,8 @@ mod test { // ^= 41 assert_eq!( - $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), - Some(Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#)) + Bytes($source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap()), + Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#) ); assert_eq!(position, 41); } diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 976f709f..d5a8c846 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -299,18 +299,14 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { Err(bang_type.to_err()) } - fn read_element(&mut self, _buf: (), position: &mut usize) -> Result> { - if self.is_empty() { - return Ok(None); - } - + fn read_element(&mut self, _buf: (), position: &mut usize) -> Result<&'a [u8]> { let mut state = ReadElementState::Elem; if let Some((bytes, i)) = state.change(self) { // Position now just after the `>` symbol *position += i; *self = &self[i..]; - return Ok(Some(bytes)); + return Ok(bytes); } // Note: Do not update position, so the error points to a sane place From 34e8d500ae3493b54fb0c56f736300b602ae4553 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 12 Nov 2023 01:17:18 +0500 Subject: [PATCH 5/6] Fix return type of XmlSource::read_bang_element - it shouldn't return None, because we inside a markup An opened `<` character was already read when we call read_bang_element, so we MUST to find `>` or return a syntax error --- src/reader/buffered_reader.rs | 6 +-- src/reader/mod.rs | 81 ++++++++++++++++------------------- src/reader/slice_reader.rs | 4 +- 3 files changed, 43 insertions(+), 48 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index a6f42121..07f3773f 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -97,7 +97,7 @@ macro_rules! impl_buffered_source { &mut self, buf: &'b mut Vec, position: &mut usize, - ) -> Result> { + ) -> Result<(BangType, &'b [u8])> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. let start = buf.len(); @@ -140,9 +140,9 @@ macro_rules! impl_buffered_source { } if read == 0 { - Ok(None) + Err(bang_type.to_err()) } else { - Ok(Some((bang_type, &buf[start..]))) + Ok((bang_type, &buf[start..])) } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 1f266f64..560b1dc8 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -328,8 +328,7 @@ macro_rules! read_until_close { .read_bang_element($buf, &mut $self.state.offset) $(.$await)? { - Ok(None) => Ok(Event::Eof), - Ok(Some((bang_type, bytes))) => $self.state.emit_bang(bang_type, bytes), + Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes), Err(e) => Err(e), }, // ` { /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event - fn read_bang_element( - &mut self, - buf: B, - position: &mut usize, - ) -> Result>; + fn read_bang_element(&mut self, buf: B, position: &mut usize) -> Result<(BangType, &'r [u8])>; /// Read input until XML element is closed by approaching a `>` symbol. /// Returns a buffer that contains a data between `<` and `>` or @@ -1154,13 +1149,13 @@ mod test { let mut input = b"![CDATA[]]>other content".as_ref(); // ^= 11 + let (ty, bytes) = $source(&mut input) + .read_bang_element(buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bang_element(buf, &mut position) - $(.$await)? - .unwrap() - .map(|(ty, data)| (ty, Bytes(data))), - Some((BangType::CData, Bytes(b"![CDATA[]]"))) + (ty, Bytes(bytes)), + (BangType::CData, Bytes(b"![CDATA[]]")) ); assert_eq!(position, 11); } @@ -1175,13 +1170,13 @@ mod test { let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref(); // ^= 28 + let (ty, bytes) = $source(&mut input) + .read_bang_element(buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bang_element(buf, &mut position) - $(.$await)? - .unwrap() - .map(|(ty, data)| (ty, Bytes(data))), - Some((BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]"))) + (ty, Bytes(bytes)), + (BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]")) ); assert_eq!(position, 28); } @@ -1300,13 +1295,13 @@ mod test { let mut input = b"!---->other content".as_ref(); // ^= 6 + let (ty, bytes) = $source(&mut input) + .read_bang_element(buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bang_element(buf, &mut position) - $(.$await)? - .unwrap() - .map(|(ty, data)| (ty, Bytes(data))), - Some((BangType::Comment, Bytes(b"!----"))) + (ty, Bytes(bytes)), + (BangType::Comment, Bytes(b"!----")) ); assert_eq!(position, 6); } @@ -1318,13 +1313,13 @@ mod test { let mut input = b"!--->comment<--->other content".as_ref(); // ^= 17 + let (ty, bytes) = $source(&mut input) + .read_bang_element(buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bang_element(buf, &mut position) - $(.$await)? - .unwrap() - .map(|(ty, data)| (ty, Bytes(data))), - Some((BangType::Comment, Bytes(b"!--->comment<---"))) + (ty, Bytes(bytes)), + (BangType::Comment, Bytes(b"!--->comment<---")) ); assert_eq!(position, 17); } @@ -1379,13 +1374,13 @@ mod test { let mut input = b"!DOCTYPE>other content".as_ref(); // ^= 9 + let (ty, bytes) = $source(&mut input) + .read_bang_element(buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bang_element(buf, &mut position) - $(.$await)? - .unwrap() - .map(|(ty, data)| (ty, Bytes(data))), - Some((BangType::DocType, Bytes(b"!DOCTYPE"))) + (ty, Bytes(bytes)), + (BangType::DocType, Bytes(b"!DOCTYPE")) ); assert_eq!(position, 9); } @@ -1453,13 +1448,13 @@ mod test { let mut input = b"!doctype>other content".as_ref(); // ^= 9 + let (ty, bytes) = $source(&mut input) + .read_bang_element(buf, &mut position) + $(.$await)? + .unwrap(); assert_eq!( - $source(&mut input) - .read_bang_element(buf, &mut position) - $(.$await)? - .unwrap() - .map(|(ty, data)| (ty, Bytes(data))), - Some((BangType::DocType, Bytes(b"!doctype"))) + (ty, Bytes(bytes)), + (BangType::DocType, Bytes(b"!doctype")) ); assert_eq!(position, 9); } diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index d5a8c846..10421309 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -281,7 +281,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { &mut self, _buf: (), position: &mut usize, - ) -> Result> { + ) -> Result<(BangType, &'a [u8])> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. debug_assert_eq!(self[0], b'!'); @@ -291,7 +291,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { if let Some((bytes, i)) = bang_type.parse(&[], self) { *position += i; *self = &self[i..]; - return Ok(Some((bang_type, bytes))); + return Ok((bang_type, bytes)); } // Note: Do not update position, so the error points to From 8d7913f257569a010236f20657192f19491b37b8 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 12 Nov 2023 01:37:26 +0500 Subject: [PATCH 6/6] Simplify code and fix dead code in read_bang_element `read` in read_bang_element was at least 1, but then branch in any case could be reachable only if .read_buf() returns empty array, but in that case we have already exited with the correct error --- src/reader/buffered_reader.rs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 07f3773f..755a710a 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -111,7 +111,7 @@ macro_rules! impl_buffered_source { match self $(.$reader)? .fill_buf() $(.$await)? { // Note: Do not update position, so the error points to // somewhere sane rather than at the EOF - Ok(n) if n.is_empty() => return Err(bang_type.to_err()), + Ok(n) if n.is_empty() => break, Ok(available) => { // We only parse from start because we don't want to consider // whatever is in the buffer before the bang element @@ -122,7 +122,7 @@ macro_rules! impl_buffered_source { read += used; *position += read; - break; + return Ok((bang_type, &buf[start..])); } else { buf.extend_from_slice(available); @@ -139,11 +139,7 @@ macro_rules! impl_buffered_source { } } - if read == 0 { - Err(bang_type.to_err()) - } else { - Ok((bang_type, &buf[start..])) - } + Err(bang_type.to_err()) } #[inline] @@ -168,7 +164,7 @@ macro_rules! impl_buffered_source { // Position now just after the `>` symbol *position += read; - break; + return Ok(&buf[start..]); } else { // The `>` symbol not yet found, continue reading buf.extend_from_slice(available); @@ -186,11 +182,7 @@ macro_rules! impl_buffered_source { }; } - if read == 0 { - Err(Error::Syntax(SyntaxError::UnclosedTag)) - } else { - Ok(&buf[start..]) - } + Err(Error::Syntax(SyntaxError::UnclosedTag)) } $($async)? fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> {