Skip to content

Commit

Permalink
Remove reader.decode() in flavor to reader.decoder().decode()
Browse files Browse the repository at this point in the history
Holding reference to a reader prevents some usages in the future

Co-authored-by: Daniel Alley <dalley@redhat.com>
  • Loading branch information
Mingun and dralley committed Jun 19, 2022
1 parent 24fac69 commit c8236e4
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 46 deletions.
3 changes: 3 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
use `String::from_utf8` instead (which that function did)
- [#191]: Remove `*_without_bom` methods from the `Attributes` struct because they are useless.
Use the same-named methods without that suffix instead. Attribute values cannot contain BOM
- [#191]: Remove `Reader::decode()`, it is replaced by `Decoder::decode()`.
Use `reader.decoder().decode(...)` instead of `reader.decode(...)` for now.
`Reader::encoding()` is replaced by `Decoder::encoding()` as well

### New Tests

Expand Down
4 changes: 2 additions & 2 deletions src/events/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl<'a> Attribute<'a> {
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
) -> XmlResult<String> {
let decoded = reader.decode(&*self.value);
let decoded = reader.decoder().decode(&*self.value);
let unescaped =
do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -126,7 +126,7 @@ impl<'a> Attribute<'a> {
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
) -> XmlResult<String> {
let decoded = reader.decode(&*self.value)?;
let decoded = reader.decoder().decode(&*self.value)?;
let unescaped =
do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand Down
8 changes: 4 additions & 4 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ impl<'a> BytesStart<'a> {
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
) -> Result<String> {
let decoded = reader.decode(&*self);
let decoded = reader.decoder().decode(&*self);
let unescaped =
do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -274,7 +274,7 @@ impl<'a> BytesStart<'a> {
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
) -> Result<String> {
let decoded = reader.decode(&*self)?;
let decoded = reader.decoder().decode(&*self)?;
let unescaped =
do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand Down Expand Up @@ -928,7 +928,7 @@ impl<'a> BytesText<'a> {
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
) -> Result<String> {
let decoded = reader.decode(&*self);
let decoded = reader.decoder().decode(&*self);
let unescaped =
do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -940,7 +940,7 @@ impl<'a> BytesText<'a> {
reader: &Reader<B>,
custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
) -> Result<String> {
let decoded = reader.decode(&*self)?;
let decoded = reader.decoder().decode(&*self)?;
let unescaped =
do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand Down
63 changes: 25 additions & 38 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -369,52 +369,20 @@ impl<R: BufRead> Reader<R> {
self.ns_resolver.resolve(name, namespace_buffer, false)
}

/// Returns the `Reader`s encoding.
///
/// The used encoding may change after parsing the XML declaration.
/// Get the decoder, used to decode bytes, read by this reader, to the strings.
///
/// This encoding will be used by [`decode`].
/// If `encoding` feature is enabled, the used encoding may change after
/// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
///
/// [`decode`]: #method.decode
#[cfg(feature = "encoding")]
pub fn encoding(&self) -> &'static Encoding {
self.encoding
}

/// Get the decoder, used to decode bytes, read by this reader, to the strings.
/// If `encoding` feature is enabled and no encoding is specified in declaration,
/// defaults to UTF-8.
pub fn decoder(&self) -> Decoder {
Decoder {
#[cfg(feature = "encoding")]
encoding: self.encoding,
}
}

/// Decodes a slice using the encoding specified in the XML declaration.
///
/// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
/// `U+FFFD REPLACEMENT CHARACTER`.
///
/// If no encoding is specified, defaults to UTF-8.
#[inline]
#[cfg(feature = "encoding")]
pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> {
self.encoding.decode(bytes).0
}

/// Decodes a UTF8 slice regardless of XML declaration.
///
/// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
/// `U+FFFD REPLACEMENT CHARACTER`.
///
/// # Note
///
/// If you instead want to use XML declared encoding, use the `encoding` feature
#[inline]
#[cfg(not(feature = "encoding"))]
pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
from_utf8(bytes).map_err(Error::Utf8)
}

/// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration.
///
/// Decode `bytes` without BOM and with malformed sequences replaced with the
Expand Down Expand Up @@ -1490,16 +1458,35 @@ pub struct Decoder {

#[cfg(not(feature = "encoding"))]
impl Decoder {
/// Decodes specified bytes using UTF-8 encoding
/// Decodes a UTF8 slice regardless of XML declaration.
///
/// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
/// `U+FFFD REPLACEMENT CHARACTER`.
///
/// # Note
///
/// If you instead want to use XML declared encoding, use the `encoding` feature
pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
from_utf8(bytes).map_err(Error::Utf8)
}
}

#[cfg(feature = "encoding")]
impl Decoder {
/// Returns the `Reader`s encoding.
///
/// This encoding will be used by [`decode`].
///
/// [`decode`]: Self::decode
pub fn encoding(&self) -> &'static Encoding {
self.encoding
}

/// Decodes specified bytes using encoding, declared in the XML, if it was
/// declared there, or UTF-8 otherwise
///
/// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
/// `U+FFFD REPLACEMENT CHARACTER`.
pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> {
self.encoding.decode(bytes).0
}
Expand Down
4 changes: 2 additions & 2 deletions tests/xmlrs_reader_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,10 +451,10 @@ fn make_attrs(e: &BytesStart) -> ::std::result::Result<String, String> {
// FIXME: The public API differs based on the "encoding" feature
fn decode<'a>(text: &'a [u8], reader: &Reader<&[u8]>) -> Cow<'a, str> {
#[cfg(feature = "encoding")]
let decoded = reader.decode(text);
let decoded = reader.decoder().decode(text);

#[cfg(not(feature = "encoding"))]
let decoded = Cow::Borrowed(reader.decode(text).unwrap());
let decoded = Cow::Borrowed(reader.decoder().decode(text).unwrap());

decoded
}
Expand Down

0 comments on commit c8236e4

Please sign in to comment.