Skip to content

Commit

Permalink
Upgrade nom dependency to 7.1.3 (#41)
Browse files Browse the repository at this point in the history
* upgrade nom to 6.2.2

* upgrade nom to 7.1.3

* cargo clippy

* fix formatting and clippy lint

* bump major version because of Display trait implementation
  • Loading branch information
ahartel authored Nov 27, 2024
1 parent b34caac commit 31235a2
Show file tree
Hide file tree
Showing 12 changed files with 111 additions and 88 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "warc"
version = "0.3.3"
version = "0.4.0"
description = "A Rust library for reading and writing WARC files."
readme = "README.md"
repository = "https://github.com/jedireza/warc"
Expand All @@ -12,7 +12,7 @@ edition = "2018"

[dependencies]
chrono = "0.4.11"
nom = "5.1.1"
nom = "7.1.3"
url = "2"
uuid = { version = "0.8.1", features = ["v4"] }

Expand Down
6 changes: 3 additions & 3 deletions examples/read_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ fn main() -> Result<(), std::io::Error> {
match record {
Err(err) => println!("ERROR: {}\r\n", err),
Ok(record) => {
println!("{}: {}", WarcHeader::RecordID.to_string(), record.warc_id(),);
println!("{}: {}", WarcHeader::Date.to_string(), record.date(),);
println!("");
println!("{}: {}", WarcHeader::RecordID, record.warc_id(),);
println!("{}: {}", WarcHeader::Date, record.date(),);
println!();
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion examples/read_filtered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fn main() -> std::io::Result<()> {

let filtered_file_names: Vec<_> = args.map(|s| s.to_string_lossy().to_string()).collect();
if filtered_file_names.is_empty() {
return Err(usage_err!("one or more filtered file names not supplied"))?;
Err(usage_err!("one or more filtered file names not supplied"))?;
}

let mut file = WarcReader::from_path_gzip(warc_name)?;
Expand Down
6 changes: 3 additions & 3 deletions examples/read_gzip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ fn main() -> Result<(), std::io::Error> {
match record {
Err(err) => println!("ERROR: {}\r\n", err),
Ok(record) => {
println!("{}: {}", WarcHeader::RecordID.to_string(), record.warc_id());
println!("{}: {}", WarcHeader::Date.to_string(), record.date());
println!("");
println!("{}: {}", WarcHeader::RecordID, record.warc_id());
println!("{}: {}", WarcHeader::Date, record.date());
println!();
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions examples/read_raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ fn main() -> Result<(), std::io::Error> {
Ok((headers, _)) => {
println!(
"{}: {}",
WarcHeader::RecordID.to_string(),
WarcHeader::RecordID,
String::from_utf8_lossy(headers.as_ref().get(&WarcHeader::RecordID).unwrap())
);
println!(
"{}: {}",
WarcHeader::Date.to_string(),
WarcHeader::Date,
String::from_utf8_lossy(headers.as_ref().get(&WarcHeader::Date).unwrap())
);
println!("");
println!();
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Error::ParseHeaders(_) => write!(f, "Error parsing headers."),
Error::MissingHeader(ref h) => write!(f, "Missing required header: {}", h.to_string()),
Error::MissingHeader(ref h) => write!(f, "Missing required header: {}", h),
Error::MalformedHeader(ref h, ref r) => {
write!(f, "Malformed header: {}: {}", h.to_string(), r)
write!(f, "Malformed header: {}: {}", h, r)
}
Error::ReadData(_) => write!(f, "Error reading data source."),
Error::ReadOverflow => write!(f, "Read further than expected."),
Expand Down
44 changes: 29 additions & 15 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ fn version(input: &[u8]) -> IResult<&[u8], &str> {

let version_str = match str::from_utf8(version) {
Err(_) => {
return Err(nom::Err::Error((input, ErrorKind::Verify)));
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Verify,
)));
}
Ok(version) => version,
};
Expand All @@ -23,8 +26,7 @@ fn version(input: &[u8]) -> IResult<&[u8], &str> {
}

fn is_header_token_char(chr: u8) -> bool {
match chr {
0..=31
!matches!(chr, 0..=31
| 128..=255
| b'('
| b')'
Expand All @@ -43,9 +45,7 @@ fn is_header_token_char(chr: u8) -> bool {
| b'{'
| b'}'
| b' '
| b'\\' => false,
_ => true,
}
| b'\\')
}

fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
Expand All @@ -63,6 +63,7 @@ fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {

/// Parse a WARC header block.
// TODO: evaluate the use of `ErrorKind::Verify` here.
#[allow(clippy::type_complexity)]
pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)> {
let (input, version) = version(input)?;
let (input, headers) = many1(header)(input)?;
Expand All @@ -73,22 +74,31 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)
for header in headers {
let token_str = match str::from_utf8(header.0) {
Err(_) => {
return Err(nom::Err::Error((input, ErrorKind::Verify)));
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Verify,
)));
}
Ok(token) => token,
};

if content_length == None && token_str.to_lowercase() == "content-length" {
if content_length.is_none() && token_str.to_lowercase() == "content-length" {
let value_str = match str::from_utf8(header.1) {
Err(_) => {
return Err(nom::Err::Error((input, ErrorKind::Verify)));
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Verify,
)));
}
Ok(value) => value,
};

match value_str.parse::<usize>() {
Err(_) => {
return Err(nom::Err::Error((input, ErrorKind::Verify)));
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Verify,
)));
}
Ok(len) => {
content_length = Some(len);
Expand All @@ -101,14 +111,15 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)

// TODO: Technically if we didn't find a `content-length` header, the record is invalid. Should
// we be returning an error here instead?
if content_length == None {
if content_length.is_none() {
content_length = Some(0);
}

Ok((input, (version, warc_headers, content_length.unwrap())))
}

/// Parse an entire WARC record.
#[allow(clippy::type_complexity)]
pub fn record(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, &[u8])> {
let (input, (headers, _)) = tuple((headers, line_ending))(input)?;
let (input, (body, _, _)) = tuple((take(headers.2), line_ending, line_ending))(input)?;
Expand All @@ -125,13 +136,13 @@ mod tests {

#[test]
fn version_parsing() {
assert_eq!(version(&b"WARC/0.0\r\n"[..]), Ok((&b""[..], &"0.0"[..])));
assert_eq!(version(&b"WARC/0.0\r\n"[..]), Ok((&b""[..], "0.0")));

assert_eq!(version(&b"WARC/1.0\r\n"[..]), Ok((&b""[..], &"1.0"[..])));
assert_eq!(version(&b"WARC/1.0\r\n"[..]), Ok((&b""[..], "1.0")));

assert_eq!(
version(&b"WARC/2.0-alpha\r\n"[..]),
Ok((&b""[..], &"2.0-alpha"[..]))
Ok((&b""[..], "2.0-alpha"))
);
}

Expand Down Expand Up @@ -168,7 +179,10 @@ mod tests {

assert_eq!(
headers(&raw_invalid[..]),
Err(Err::Error((&b"\r\n"[..], ErrorKind::Verify)))
Err(Err::Error(nom::error::Error::new(
&b"\r\n"[..],
ErrorKind::Verify
)))
);

let raw = b"\
Expand Down
9 changes: 4 additions & 5 deletions src/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@ mod streaming_trait {
impl<'t, T: Read + 't> Read for StreamingBody<'t, T> {
fn read(&mut self, data: &mut [u8]) -> std::io::Result<usize> {
let max_read = std::cmp::min(data.len(), *self.1 as usize);
self.0.read(&mut data[..max_read as usize]).map(|n| {
self.0.read(&mut data[..max_read]).inspect(|&n| {
*self.1 -= n as u64;
n
})
}
}
Expand Down Expand Up @@ -156,7 +155,7 @@ impl std::fmt::Display for RawRecordHeader {
fn fmt(&self, w: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
writeln!(w, "WARC/{}", self.version)?;
for (key, value) in self.as_ref().iter() {
writeln!(w, "{}: {}", key.to_string(), String::from_utf8_lossy(value))?;
writeln!(w, "{}: {}", key, String::from_utf8_lossy(value))?;
}
writeln!(w)?;

Expand Down Expand Up @@ -263,7 +262,7 @@ impl<T: BodyKind> Record<T> {
/// The current implementation generates random values based on UUID version 4.
///
pub fn generate_record_id() -> String {
format!("<{}>", Uuid::new_v4().to_urn().to_string())
format!("<{}>", Uuid::new_v4().to_urn())
}

fn parse_content_length(len: &str) -> Result<u64, WarcError> {
Expand Down Expand Up @@ -1058,7 +1057,7 @@ mod raw_tests {

let output = headers.to_string();

let expected_lines = vec![
let expected_lines = [
"WARC/1.0",
"warc-type: dunno",
"warc-date: 2024-01-01T00:00:00Z",
Expand Down
8 changes: 5 additions & 3 deletions src/record_type.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#![allow(missing_docs)]

use std::fmt::Display;
#[derive(Clone, Debug, PartialEq)]
pub enum RecordType {
WarcInfo,
Expand All @@ -12,8 +14,8 @@ pub enum RecordType {
Unknown(String),
}

impl ToString for RecordType {
fn to_string(&self) -> String {
impl Display for RecordType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let stringified = match *self {
RecordType::WarcInfo => "warcinfo",
RecordType::Response => "response",
Expand All @@ -25,7 +27,7 @@ impl ToString for RecordType {
RecordType::Continuation => "continuation",
RecordType::Unknown(ref val) => val.as_ref(),
};
stringified.to_string()
f.write_str(stringified)
}
}

Expand Down
8 changes: 5 additions & 3 deletions src/truncated_type.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#![allow(missing_docs)]

use std::fmt::Display;
#[derive(Clone, Debug, PartialEq)]
pub enum TruncatedType {
Length,
Expand All @@ -8,16 +10,16 @@ pub enum TruncatedType {
Unknown(String),
}

impl ToString for TruncatedType {
fn to_string(&self) -> String {
impl Display for TruncatedType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let stringified = match *self {
TruncatedType::Length => "length",
TruncatedType::Time => "time",
TruncatedType::Disconnect => "disconnect",
TruncatedType::Unspecified => "unspecified",
TruncatedType::Unknown(ref val) => val.as_ref(),
};
stringified.to_string()
f.write_str(stringified)
}
}

Expand Down
Loading

0 comments on commit 31235a2

Please sign in to comment.