Skip to content
This repository has been archived by the owner on Jan 10, 2025. It is now read-only.

Commit

Permalink
Use curl instead of reqwest for remote resources
Browse files Browse the repository at this point in the history
Closes #304
  • Loading branch information
swsnr committed Nov 24, 2024
1 parent 79f1399 commit ed99ea5
Show file tree
Hide file tree
Showing 11 changed files with 206 additions and 22 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ Use `cargo release` to create a new release.

## [Unreleased]

### Changed
- `mdcat` now uses curl for remote resources (see [GH-304]).
As a result, mdcat now also supports remote images from FTP, and uses standard curl environment variables for proxy configuration.

[GH-304]: https://github.com/swsnr/mdcat/issues/304

## [2.6.2] – 2024-11-24

### Fixed
Expand Down
46 changes: 45 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,16 @@ rust-version.workspace = true
anyhow = { version = "1.0.89", default-features = false, features = ["std"] }
clap = { version = "4.5.17", default-features = false, features = ["std", "derive", "help", "usage", "error-context"] }
clap_complete = "4.5.28"
curl = "0.4.47"
human-panic = { version = "2.0.0", default-features = false, features = ["color"] }
mdcat-http-reqwest = { workspace = true }
mime = { workspace = true}
pulldown-cmark = { workspace = true, features = ['simd'] }
pulldown-cmark-mdcat = { workspace = true, default-features = true }
shell-words = { version = "1.1.0", default-features = false, features = ["std"] }
syntect = { workspace = true, features = ["default-syntaxes"] }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3.18", default-features = false, features = ["env-filter", "std", "fmt", "ansi"] }
url = { workspace = true }

[dev-dependencies]
similar-asserts = { workspace = true }
Expand All @@ -47,7 +49,7 @@ pre-release-replacements = [
[workspace]
members = [
"pulldown-cmark-mdcat",
"mdcat-http-reqwest"
"mdcat-http-reqwest",
]

[workspace.package]
Expand All @@ -68,6 +70,7 @@ similar-asserts = "1.6.0"
syntect = { version = "5.2.0", default-features = false, features = ["regex-fancy"] }
tracing = { version = "0.1.40", default-features = false, features = ["attributes"] }
insta = { version = "1.40.0", features = ["glob", "filters"] }
url = "2.5.2"

# Our own crates; pinned to an exact version because we release all of this repo
# in a single version. cargo release takes care of updating these.
Expand Down
12 changes: 2 additions & 10 deletions mdcat.1.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -202,19 +202,11 @@ https_proxy::
HTTPS_PROXY::
all_proxy::
ALL_PROXY::
Proxies for HTTP, HTTPS, or both protocols, to use when fetching images.
+
Each variable provides the proxy for the corresponding protocol as URL, e.g. ``http://proxy.example.com:3128``.
+
The lowercase name takes precedence; note that `$http_proxy` deliberately has no uppercase variant.

no_proxy::
NO_PROXY::
A comma-separated list of host/domain names or IP address not to use a proxy for.
Proxies settings for HTTP requests made by mdcat to retrieve remote resources.
+
IP addresses may include a subnet mask to match whole networks.
The special entry `*` matches all hostnames.
Any other entry is interpreted as domain name, which matches the domain itself and all its subdomains.
mdcat uses curl for its network transfers, hence see `curl(1)` for these variables.

MDCAT_LOG::
Directives to configure output of tracing information.
Expand Down
2 changes: 1 addition & 1 deletion pulldown-cmark-mdcat/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ syntect = { workspace = true, features = ["parsing", "regex-fancy"] }
textwrap = { version = "0.16.1", default-features = false, features = ["unicode-linebreak", "unicode-width"] }
thiserror = { version = "1.0.61", default-features = false }
tracing = { workspace = true }
url = "2.5.2"
url = { workspace = true }
gethostname = "0.5.0"

# Optional for svg support
Expand Down
13 changes: 5 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ use std::io::{prelude::*, BufWriter};
use std::path::PathBuf;

use anyhow::{Context, Result};
use mdcat_http_reqwest::HttpResourceHandler;
use pulldown_cmark::{Options, Parser};
use pulldown_cmark_mdcat::resources::{
DispatchingResourceHandler, FileResourceHandler, ResourceUrlHandler,
};
use pulldown_cmark_mdcat::{Environment, Settings};
use resources::CurlResourceHandler;
use tracing::{event, instrument, Level};

use args::ResourceAccess;
Expand All @@ -34,6 +34,8 @@ use output::Output;
pub mod args;
/// Output handling for mdcat.
pub mod output;
/// Resource handling for mdca.
pub mod resources;

/// Default read size limit for resources.
pub static DEFAULT_RESOURCE_READ_LIMIT: u64 = 104_857_600;
Expand Down Expand Up @@ -114,14 +116,9 @@ pub fn create_resource_handler(access: ResourceAccess) -> Result<DispatchingReso
"Remote resource access permitted, creating HTTP client with user agent {}",
user_agent
);
let client = mdcat_http_reqwest::build_default_client()
.user_agent(user_agent)
.build()
let client = CurlResourceHandler::create(DEFAULT_RESOURCE_READ_LIMIT, user_agent)
.with_context(|| "Failed to build HTTP client".to_string())?;
resource_handlers.push(Box::new(HttpResourceHandler::new(
DEFAULT_RESOURCE_READ_LIMIT,
client,
)));
resource_handlers.push(Box::new(client));
}
Ok(DispatchingResourceHandler::new(resource_handlers))
}
3 changes: 3 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ fn main() {
)
);

// Initialize curl for remote resources
curl::init();

// Setup tracing
let filter = EnvFilter::builder()
// Disable all logging by default, to avoid interfering with regular output at all cost.
Expand Down
100 changes: 100 additions & 0 deletions src/resources.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2018-2020 Sebastian Wiesner <[email protected]>

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

use std::{cell::RefCell, time::Duration};

use curl::easy::{Easy2, Handler, WriteError};
use mime::Mime;
use pulldown_cmark_mdcat::{
resources::{filter_schemes, MimeData},
ResourceUrlHandler,
};
use tracing::{event, instrument, Level};
use url::Url;

/// Handle curl data by writing into a buffer.
#[derive(Debug, Clone, Default)]
pub struct CollectBuffer {
read_limit: u64,
buffer: Vec<u8>,
}

impl Handler for CollectBuffer {
fn write(&mut self, data: &[u8]) -> Result<usize, WriteError> {
if self.read_limit < (self.buffer.len() + data.len()).try_into().unwrap() {
// Do not handle data and tell curl that we didn't handle it;
// this will make curl fail with a write error
Ok(0)
} else {
self.buffer.extend_from_slice(data);
Ok(data.len())
}
}
}

/// A [`curl`]-based resource handler for [`pulldown-cmark-mdcat`].
pub struct CurlResourceHandler {
easy: RefCell<Easy2<CollectBuffer>>,
}

impl CurlResourceHandler {
/// Create a new resource handler.
///
/// `read_limit` is the maximum amount of data to be read from a resource.
/// `useragent` is the value of the user agent header.
pub fn create(read_limit: u64, useragent: &str) -> std::io::Result<Self> {
let mut easy = Easy2::new(CollectBuffer {
buffer: Vec::new(),
read_limit,
});
// Use somewhat aggressive timeouts to avoid blocking rendering for long; we have graceful
// fallbacks since we have to support terminals without image capabilities anyways.

easy.timeout(Duration::from_secs(1))?;
easy.connect_timeout(Duration::from_secs(1))?;
easy.follow_location(true)?;
easy.fail_on_error(true)?;
easy.tcp_nodelay(true)?;
easy.useragent(useragent)?;
Ok(Self::new(easy))
}

/// Create a new resource handler.
pub fn new(easy: Easy2<CollectBuffer>) -> Self {
Self {
easy: RefCell::new(easy),
}
}
}

impl ResourceUrlHandler for CurlResourceHandler {
#[instrument(level = "debug", skip(self), fields(url = %url))]
fn read_resource(
&self,
url: &Url,
) -> std::io::Result<pulldown_cmark_mdcat::resources::MimeData> {
// See https://curl.se/docs/url-syntax.html for all schemas curl supports
// We omit the more exotic ones :)
filter_schemes(&["http", "https", "ftp", "ftps", "smb"], url).and_then(|url| {
let mut easy = self.easy.borrow_mut();
easy.url(url.as_str())?;
easy.perform()?;

let mime_type = easy.content_type()?.and_then(|content_type| {
event!(
Level::DEBUG,
"Raw Content-Type of remote resource {}: {:?}",
&url,
content_type
);
content_type.parse::<Mime>().ok()
});
let data = easy.get_ref().buffer.clone();
easy.get_mut().buffer.clear();
Ok(MimeData { mime_type, data })
})
}
}
21 changes: 21 additions & 0 deletions supply-chain/audits.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,20 @@ start = "2023-04-13"
end = "2025-11-04"
notes = "swsnr trusts epage"

[[trusted.curl]]
criteria = "safe-to-run"
user-id = 1 # Alex Crichton (alexcrichton)
start = "2019-03-12"
end = "2025-11-24"
notes = "Trusted by Mozilla and Bytecode Alliance"

[[trusted.curl-sys]]
criteria = "safe-to-run"
user-id = 1 # Alex Crichton (alexcrichton)
start = "2019-03-12"
end = "2025-11-24"
notes = "Trusted by Mozilla and Bytecode Alliance"

[[trusted.errno]]
criteria = "safe-to-run"
user-id = 6825 # Dan Gohman (sunfishcode)
Expand Down Expand Up @@ -122,6 +136,13 @@ start = "2019-03-04"
end = "2025-11-04"
notes = "Trusted by Mozilla and Bytecode Alliance"

[[trusted.libz-sys]]
criteria = "safe-to-run"
user-id = 4333
start = "2020-08-14"
end = "2025-11-24"
notes = "Trusted by Mozilla and Bytecode Alliance"

[[trusted.linux-raw-sys]]
criteria = "safe-to-run"
user-id = 6825 # Dan Gohman (sunfishcode)
Expand Down
4 changes: 4 additions & 0 deletions supply-chain/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ criteria = "safe-to-run"
version = "0.2.11"
criteria = "safe-to-run"

[[exemptions.libz-sys]]
version = "1.1.20"
criteria = "safe-to-run"

[[exemptions.litemap]]
version = "0.7.4"
criteria = "safe-to-run"
Expand Down
14 changes: 14 additions & 0 deletions supply-chain/imports.lock
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,20 @@ user-id = 5946
user-login = "jrmuizel"
user-name = "Jeff Muizelaar"

[[publisher.curl]]
version = "0.4.47"
when = "2024-09-30"
user-id = 1
user-login = "alexcrichton"
user-name = "Alex Crichton"

[[publisher.curl-sys]]
version = "0.4.78+curl-8.11.0"
when = "2024-11-06"
user-id = 1
user-login = "alexcrichton"
user-name = "Alex Crichton"

[[publisher.errno]]
version = "0.3.9"
when = "2024-05-08"
Expand Down

0 comments on commit ed99ea5

Please sign in to comment.