Skip to content

Commit

Permalink
Fix #329: Borrow from input in unescape_* methods
Browse files Browse the repository at this point in the history
Because those methods usually used on events returned by reader, which
always borrow content from input / buffer, actual allocation count does
not changed
  • Loading branch information
Mingun committed Aug 14, 2022
1 parent 2bf2d2d commit abec80f
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 15 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@
|`*_with_custom_entities`|`*_with`
|`BytesText::unescaped()`|`BytesText::unescape()`
|`Attribute::unescaped_*`|`Attribute::unescape_*`
- [#329]: Also, that functions now borrow from the input instead of event / attribute

- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
added to all events
Expand Down Expand Up @@ -199,6 +200,7 @@
[#180]: https://github.com/tafia/quick-xml/issues/180
[#191]: https://github.com/tafia/quick-xml/issues/191
[#324]: https://github.com/tafia/quick-xml/issues/324
[#329]: https://github.com/tafia/quick-xml/issues/329
[#363]: https://github.com/tafia/quick-xml/issues/363
[#387]: https://github.com/tafia/quick-xml/pull/387
[#391]: https://github.com/tafia/quick-xml/pull/391
Expand Down
29 changes: 20 additions & 9 deletions src/events/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ impl<'a> Attribute<'a> {
///
/// This method is available only if `encoding` feature is **not** enabled.
#[cfg(any(doc, not(feature = "encoding")))]
pub fn unescape_value(&self) -> XmlResult<Cow<str>> {
pub fn unescape_value(&self) -> XmlResult<Cow<'a, str>> {
self.unescape_value_with(|_| None)
}

Expand All @@ -61,19 +61,26 @@ impl<'a> Attribute<'a> {
pub fn unescape_value_with<'entity>(
&self,
resolve_entity: impl Fn(&str) -> Option<&'entity str>,
) -> XmlResult<Cow<str>> {
) -> XmlResult<Cow<'a, str>> {
// from_utf8 should never fail because content is always UTF-8 encoded
Ok(unescape_with(
std::str::from_utf8(&self.value)?,
resolve_entity,
)?)
let decoded = match &self.value {
Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes)?),
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
Cow::Owned(bytes) => Cow::Owned(std::str::from_utf8(bytes)?.to_string()),
};

match unescape_with(&decoded, resolve_entity)? {
// Because result is borrowed, no replacements was done and we can use original string
Cow::Borrowed(_) => Ok(decoded),
Cow::Owned(s) => Ok(s.into()),
}
}

/// Decodes then unescapes the value.
///
/// This will allocate if the value contains any escape sequences or in
/// non-UTF-8 encoding.
pub fn decode_and_unescape_value<B>(&self, reader: &Reader<B>) -> XmlResult<Cow<str>> {
pub fn decode_and_unescape_value<B>(&self, reader: &Reader<B>) -> XmlResult<Cow<'a, str>> {
self.decode_and_unescape_value_with(reader, |_| None)
}

Expand All @@ -85,8 +92,12 @@ impl<'a> Attribute<'a> {
&self,
reader: &Reader<B>,
resolve_entity: impl Fn(&str) -> Option<&'entity str>,
) -> XmlResult<Cow<str>> {
let decoded = reader.decoder().decode(&*self.value)?;
) -> XmlResult<Cow<'a, str>> {
let decoded = match &self.value {
Cow::Borrowed(bytes) => reader.decoder().decode(bytes)?,
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
Cow::Owned(bytes) => reader.decoder().decode(bytes)?.into_owned().into(),
};

match unescape_with(&decoded, resolve_entity)? {
// Because result is borrowed, no replacements was done and we can use original string
Expand Down
14 changes: 8 additions & 6 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ impl<'a> BytesText<'a> {
///
/// This will allocate if the value contains any escape sequences or in
/// non-UTF-8 encoding.
pub fn unescape(&self) -> Result<Cow<str>> {
pub fn unescape(&self) -> Result<Cow<'a, str>> {
self.unescape_with(|_| None)
}

Expand All @@ -743,8 +743,12 @@ impl<'a> BytesText<'a> {
pub fn unescape_with<'entity>(
&self,
resolve_entity: impl Fn(&str) -> Option<&'entity str>,
) -> Result<Cow<str>> {
let decoded = self.decoder.decode(&*self)?;
) -> Result<Cow<'a, str>> {
let decoded = match &self.content {
Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
};

match unescape_with(&decoded, resolve_entity)? {
// Because result is borrowed, no replacements was done and we can use original string
Expand All @@ -754,11 +758,9 @@ impl<'a> BytesText<'a> {
}

/// Gets content of this text buffer in the specified encoding and optionally
/// unescapes it. Unlike [`Self::unescape`] & Co., the lifetime
/// of the returned `Cow` is bound to the original buffer / input
/// unescapes it.
#[cfg(feature = "serialize")]
pub(crate) fn decode(&self, unescape: bool) -> Result<Cow<'a, str>> {
//TODO: too many copies, can be optimized
let text = match &self.content {
Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
Expand Down

0 comments on commit abec80f

Please sign in to comment.