Skip to content

Commit

Permalink
Allow using tokio's AsyncBufRead
Browse files Browse the repository at this point in the history
  • Loading branch information
endor committed Sep 24, 2020
1 parent f69b2f8 commit ae18715
Show file tree
Hide file tree
Showing 19 changed files with 1,977 additions and 431 deletions.
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name = "quick-xml"
version = "0.18.1"
authors = ["Johann Tuffe <[email protected]>"]
description = "High performance xml reader and writer"
edition = "2018"

documentation = "https://docs.rs/quick-xml"
repository = "https://github.com/tafia/quick-xml"
Expand All @@ -16,13 +17,16 @@ license = "MIT"
travis-ci = { repository = "tafia/quick-xml" }

[dependencies]
async-recursion = { version = "0.3.1", optional = true }
encoding_rs = { version = "0.8.22", optional = true }
tokio = { version = "0.2.22", features = ["fs", "io-util"], optional = true }
serde = { version = "1.0", optional = true }
memchr = "2.3.3"

[dev-dependencies]
serde = { version = "1.0", features = ["derive"] }
regex = "1"
tokio = { version = "0.2.22", features = ["macros", "rt-threaded"] }

[lib]
bench = false
Expand All @@ -31,6 +35,7 @@ bench = false
default = []
encoding = ["encoding_rs"]
serialize = ["serde"]
asynchronous = ["tokio", "async-recursion"]

[package.metadata.docs.rs]
features = ["serialize"]
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ fn crates_io() -> Result<Html, DeError> {

### Credits

This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs).
quick-xml follows its convention for deserialization, including the
This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs).
quick-xml follows its convention for deserialization, including the
[`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name.

### Parsing the "value" of a tag
Expand All @@ -234,6 +234,7 @@ Note that despite not focusing on performance (there are several unecessary copi

- `encoding`: support non utf8 xmls
- `serialize`: support serde `Serialize`/`Deserialize`
- `asynchronous`: support for `AsyncRead`s in `tokio`

## Performance

Expand Down
16 changes: 13 additions & 3 deletions examples/issue68.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#![allow(unused)]

extern crate quick_xml;

use quick_xml::events::Event;
use quick_xml::Reader;
use std::io::Read;
#[cfg(feature = "asynchronous")]
use tokio::runtime::Runtime;

struct Resource {
etag: String,
Expand Down Expand Up @@ -81,8 +81,18 @@ fn parse_report(xml_data: &str) -> Vec<Resource> {
let mut depth = 0;
let mut state = State::MultiStatus;

#[cfg(feature = "asynchronous")]
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");

loop {
match reader.read_namespaced_event(&mut buf, &mut ns_buffer) {
#[cfg(feature = "asynchronous")]
let event = runtime
.block_on(async { reader.read_namespaced_event(&mut buf, &mut ns_buffer).await });

#[cfg(not(feature = "asynchronous"))]
let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer);

match event {
Ok((namespace_value, Event::Start(e))) => {
let namespace_value = namespace_value.unwrap_or_default();
match (depth, state, namespace_value, e.local_name()) {
Expand Down
32 changes: 29 additions & 3 deletions examples/nested_readers.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
extern crate quick_xml;
use quick_xml::events::Event;
use quick_xml::Reader;
#[cfg(feature = "asynchronous")]
use tokio::runtime::Runtime;

// a structure to capture the rows we've extracted
// from a ECMA-376 table in document.xml
#[derive(Debug, Clone)]
Expand All @@ -16,10 +18,26 @@ fn main() -> Result<(), quick_xml::Error> {
// buffer for nested reader
let mut skip_buf = Vec::new();
let mut count = 0;

#[cfg(feature = "asynchronous")]
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");

#[cfg(feature = "asynchronous")]
let mut reader =
runtime.block_on(async { Reader::from_file("tests/documents/document.xml").await })?;

#[cfg(not(feature = "asynchronous"))]
let mut reader = Reader::from_file("tests/documents/document.xml")?;

let mut found_tables = Vec::new();
loop {
match reader.read_event(&mut buf)? {
#[cfg(feature = "asynchronous")]
let event = runtime.block_on(async { reader.read_event(&mut buf).await })?;

#[cfg(not(feature = "asynchronous"))]
let event = reader.read_event(&mut buf)?;

match event {
Event::Start(element) => match element.name() {
b"w:tbl" => {
count += 1;
Expand All @@ -32,7 +50,15 @@ fn main() -> Result<(), quick_xml::Error> {
let mut row_index = 0;
loop {
skip_buf.clear();
match reader.read_event(&mut skip_buf)? {

#[cfg(feature = "asynchronous")]
let event =
runtime.block_on(async { reader.read_event(&mut skip_buf).await })?;

#[cfg(not(feature = "asynchronous"))]
let event = reader.read_event(&mut skip_buf)?;

match event {
Event::Start(element) => match element.name() {
b"w:tr" => {
stats.rows.push(vec![]);
Expand Down
29 changes: 24 additions & 5 deletions examples/read_texts.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
extern crate quick_xml;
#[cfg(feature = "asynchronous")]
use tokio::runtime::Runtime;

fn main() {
use quick_xml::events::Event;
Expand All @@ -13,14 +14,32 @@ fn main() {
let mut txt = Vec::new();
let mut buf = Vec::new();

#[cfg(feature = "asynchronous")]
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");

loop {
match reader.read_event(&mut buf) {
#[cfg(feature = "asynchronous")]
let event = runtime.block_on(async { reader.read_event(&mut buf).await });

#[cfg(not(feature = "asynchronous"))]
let event = reader.read_event(&mut buf);

match event {
Ok(Event::Start(ref e)) if e.name() == b"tag2" => {
txt.push(
#[cfg(feature = "asynchronous")]
let text = runtime.block_on(async {
reader
.read_text(b"tag2", &mut Vec::new())
.expect("Cannot decode text value"),
);
.await
.expect("Cannot decode text value")
});

#[cfg(not(feature = "asynchronous"))]
let text = reader
.read_text(b"tag2", &mut Vec::new())
.expect("Cannot decode text value");

txt.push(text);
println!("{:?}", txt);
}
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Expand Down
2 changes: 1 addition & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub enum Error {
/// Duplicate attribute
DuplicatedAttribute(usize, usize),
/// Escape error
EscapeError(::escape::EscapeError),
EscapeError(crate::escape::EscapeError),
}

impl From<::std::io::Error> for Error {
Expand Down
21 changes: 7 additions & 14 deletions src/events/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
//!
//! Provides an iterator over attributes key/value pairs
use errors::{Error, Result};
use escape::{escape, unescape};
use reader::{is_whitespace, Reader};
use crate::errors::{Error, Result};
use crate::escape::{escape, unescape};
use crate::reader::{is_whitespace, Decode};
use std::borrow::Cow;
use std::io::BufRead;
use std::ops::Range;

/// Iterator over XML attributes.
Expand Down Expand Up @@ -107,7 +106,7 @@ impl<'a> Attribute<'a> {
/// [`unescaped_value()`]: #method.unescaped_value
/// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode
#[cfg(feature = "encoding")]
pub fn unescape_and_decode_value<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
pub fn unescape_and_decode_value(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode(&*self.value);
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -124,7 +123,7 @@ impl<'a> Attribute<'a> {
/// [`unescaped_value()`]: #method.unescaped_value
/// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode
#[cfg(not(feature = "encoding"))]
pub fn unescape_and_decode_value<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
pub fn unescape_and_decode_value(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode(&*self.value)?;
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -138,10 +137,7 @@ impl<'a> Attribute<'a> {
/// 1. BytesText::unescaped()
/// 2. Reader::decode(...)
#[cfg(feature = "encoding")]
pub fn unescape_and_decode_without_bom<B: BufRead>(
&self,
reader: &mut Reader<B>,
) -> Result<String> {
pub fn unescape_and_decode_without_bom(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode_without_bom(&*self.value);
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -155,10 +151,7 @@ impl<'a> Attribute<'a> {
/// 1. BytesText::unescaped()
/// 2. Reader::decode(...)
#[cfg(not(feature = "encoding"))]
pub fn unescape_and_decode_without_bom<B: BufRead>(
&self,
reader: &Reader<B>,
) -> Result<String> {
pub fn unescape_and_decode_without_bom(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode_without_bom(&*self.value)?;
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand Down
46 changes: 28 additions & 18 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@ pub mod attributes;
#[cfg(feature = "encoding_rs")]
use encoding_rs::Encoding;
use std::borrow::Cow;
use std::io::BufRead;
use std::ops::Deref;
use std::str::from_utf8;

use self::attributes::{Attribute, Attributes};
use errors::{Error, Result};
use escape::{escape, unescape};
use reader::Reader;
use crate::errors::{Error, Result};
use crate::escape::{escape, unescape};
use crate::reader::Decode;

use memchr;

Expand Down Expand Up @@ -175,7 +174,7 @@ impl<'a> BytesStart<'a> {
/// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode
#[cfg(feature = "encoding")]
#[inline]
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode(&*self);
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -193,7 +192,7 @@ impl<'a> BytesStart<'a> {
/// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode
#[cfg(not(feature = "encoding"))]
#[inline]
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode(&*self)?;
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand Down Expand Up @@ -495,10 +494,7 @@ impl<'a> BytesText<'a> {
/// 1. BytesText::unescaped()
/// 2. Reader::decode(...)
#[cfg(feature = "encoding")]
pub fn unescape_and_decode_without_bom<B: BufRead>(
&self,
reader: &mut Reader<B>,
) -> Result<String> {
pub fn unescape_and_decode_without_bom(&self, reader: &mut impl Decode) -> Result<String> {
let decoded = reader.decode_without_bom(&*self);
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -512,10 +508,7 @@ impl<'a> BytesText<'a> {
/// 1. BytesText::unescaped()
/// 2. Reader::decode(...)
#[cfg(not(feature = "encoding"))]
pub fn unescape_and_decode_without_bom<B: BufRead>(
&self,
reader: &Reader<B>,
) -> Result<String> {
pub fn unescape_and_decode_without_bom(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode_without_bom(&*self)?;
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -528,7 +521,7 @@ impl<'a> BytesText<'a> {
/// 1. BytesText::unescaped()
/// 2. Reader::decode(...)
#[cfg(feature = "encoding")]
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode(&*self);
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand All @@ -541,7 +534,7 @@ impl<'a> BytesText<'a> {
/// 1. BytesText::unescaped()
/// 2. Reader::decode(...)
#[cfg(not(feature = "encoding"))]
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result<String> {
let decoded = reader.decode(&*self)?;
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
Expand Down Expand Up @@ -663,6 +656,8 @@ impl<'a> AsRef<Event<'a>> for Event<'a> {
#[cfg(test)]
mod test {
use super::*;
#[cfg(feature = "asynchronous")]
use tokio::runtime::Runtime;

#[test]
fn local_name() {
Expand All @@ -673,11 +668,25 @@ mod test {
<:foo attr='bar'>foobusbar</:foo>
<foo:bus:baz attr='bar'>foobusbar</foo:bus:baz>
"#;
let mut rdr = Reader::from_str(xml);
let mut rdr = crate::Reader::from_str(xml);
let mut buf = Vec::new();
let mut parsed_local_names = Vec::new();

#[cfg(feature = "asynchronous")]
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");

loop {
match rdr.read_event(&mut buf).expect("unable to read xml event") {
#[cfg(feature = "asynchronous")]
let event = runtime.block_on(async {
rdr.read_event(&mut buf)
.await
.expect("unable to read xml event")
});

#[cfg(not(feature = "asynchronous"))]
let event = rdr.read_event(&mut buf).expect("unable to read xml event");

match event {
Event::Start(ref e) => parsed_local_names.push(
from_utf8(e.local_name())
.expect("unable to build str from local_name")
Expand All @@ -692,6 +701,7 @@ mod test {
_ => {}
}
}

assert_eq!(parsed_local_names[0], "bus".to_string());
assert_eq!(parsed_local_names[1], "bus".to_string());
assert_eq!(parsed_local_names[2], "".to_string());
Expand Down
Loading

0 comments on commit ae18715

Please sign in to comment.