diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f63a9998..ba2a1545 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -68,8 +68,6 @@ jobs: include: - target: x86_64-unknown-linux-gnu os: ubuntu-latest - # On Linux link against libmagic, since it's almost always available - cargo_opts: "--features magic" - target: x86_64-unknown-linux-musl os: ubuntu-latest cargo_opts: "--no-default-features --features static" diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fc89f6a..f0f35f34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,17 +8,17 @@ Use `cargo release` to create a new release. ## [Unreleased] -### Added -- With `--features magic` use `libmagic` to check for SVG data, instead of shelling out to `file` (see [GH-236]). - ### Changed - Update all dependencies. +- No longer sniff mime type from contents to identify SVG images. + Instead rely on the `Content-Type` header for HTTP(S) images and the file extension for local resources (see [GH-239]). ### Fixed - Use `less -r` instead of `less -R` in `mdless` if both `$PAGER` and `$MDCAT_PAGER` are unset (see [GH-238]). [GH-236]: https://github.com/swsnr/mdcat/pull/236 [GH-238]: https://github.com/swsnr/mdcat/issues/238 +[GH-239]: https://github.com/swsnr/mdcat/pull/239 ## [1.0.0] – 2023-01-07 diff --git a/Cargo.lock b/Cargo.lock index 338ae379..3b5402e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -811,29 +811,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "magic" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87142e3acb1f4daa62eaea96605421a534119d4777a9fb43fb2784798fd89665" -dependencies = [ - "bitflags", - "errno", - "libc", - "magic-sys", - "thiserror", -] - -[[package]] -name = "magic-sys" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eff86ae08895140d628119d407d568f3b657145ee8c265878064f717534bb3bc" -dependencies = [ - "libc", - "vcpkg", -] - [[package]] name = "matchers" version = "0.1.0" @@ -858,8 +835,8 @@ dependencies = [ "image", "lazy_static", "libc", - "magic", "mime", + "mime_guess", "once_cell", "pretty_assertions", "pulldown-cmark", @@ -894,6 +871,16 @@ version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +[[package]] +name = "mime_guess" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "miniz_oxide" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 3bf33202..90fe975a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ env_proxy = "0.4.1" gethostname = "0.4.1" image = "0.24.5" mime = "0.3.16" +mime_guess = "2.0.4" pulldown-cmark = { version = "0.9.2", default-features = false, features = ['simd'] } shell-words = "1.1.0" tracing = { version = "0.1.37", features = ["release_max_level_warn"] } @@ -35,7 +36,6 @@ tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } url = "2.3.1" reqwest = { version = "0.11.14", default-features = false, features = ["gzip", "brotli", "blocking"]} once_cell = "1.17.0" -magic = { version = "0.13.0", optional = true } [dependencies.syntect] version = "5.0.0" diff --git a/README.md b/README.md index 0a5de3f1..acb0f3ee 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,12 @@ Try `mdcat --help` or read the [mdcat(1)](./mdcat.1.adoc) manpage. * [Chocolatey]: `choco install mdcat` * You can also build `mdcat` manually with `cargo install mdcat`. +[Homebrew]: https://brew.sh +[MacPorts]: https://www.macports.org +[Arch Linux]: https://www.archlinux.org/packages/community/x86_64/mdcat/ +[scoop]: https://github.com/lukesampson/scoop +[chocolatey]: https://github.com/chocolatey + ## Building Run `cargo build --release`. @@ -79,8 +85,6 @@ The resulting `mdcat` executable links against the system's SSL library, i.e. op To build a self-contained executable use `cargo build --features=static`; the resulting executable uses a pure Rust SSL implementation. It still uses the system's CA roots however. -Pass `--features magic` to use `libmagic` instead of the `file` program to check mimetypes. - The build process also generates the following additional files in `$OUT_DIR`: * Completions for Bash, Zsh, Fish, and Powershell, for both `mdcat` and `mdless`, in `completions` sub-directory. @@ -92,16 +96,6 @@ If you package mdcat you may want to include these files too. [AsciiDoctor]: https://asciidoctor.org/ -## Requirements - -If `mdcat` wasn't built with the `magic` feature, image type detection requires the `file` tool with support for `--brief` and `--mime-type` flags to be available in `$PATH`. - -[Homebrew]: https://brew.sh -[MacPorts]: https://www.macports.org -[Arch Linux]: https://www.archlinux.org/packages/community/x86_64/mdcat/ -[scoop]: https://github.com/lukesampson/scoop -[chocolatey]: https://github.com/chocolatey - ## Troubleshooting `mdcat` can output extensive tracing information when asked to. diff --git a/mdcat.1.adoc b/mdcat.1.adoc index a0ccda66..3737afb0 100644 --- a/mdcat.1.adoc +++ b/mdcat.1.adoc @@ -53,10 +53,7 @@ In particular this disables all image support which relies on proprietary escape In iTerm2, Kitty, Terminology and WezTerm mdcat prints inline images. mdcat supports most standard pixel formats by default. -mdcat silently ignores images larger than 100 MiB. - -For some terminals mdcat needs to detect the mimetype. -If `mdcat` was not built with `libmagic` (see output of `--version` to check) it uses the `file --brief --mime-type` for this purpose, and thus requires a compatible `file` utility in `$PATH`. +mdcat silently ignores images larger than 100 MiB, under the assumption that images of that size cannot reasonably be rendered in a terminal. === SVG support @@ -65,6 +62,9 @@ In Terminology mdcat also renders SVG images, using the built-in support of Term In iTerm2, Kitty and WezTerm mdcat requires `rsvg-convert` to render SVG images to pixel graphics before displaying them; if `rsvg-convert` is not found in `$PATH` mdcat does not render SVG images in these terminals. +For local SVG files mdcat relies on the file extension to identify SVG images. +For remote images from HTTP(S) URLs mdcat inspects the `Content-Type` header to identify SVG images. + === HTTP/HTTPS support mdcat fetches images from HTTP(S) URLs for rendering if the underlying terminal supports image rendering; diff --git a/src/lib.rs b/src/lib.rs index 4df8b794..5d93f3dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,6 @@ use crate::terminal::capabilities::TerminalCapabilities; use crate::terminal::TerminalSize; use url::Url; -mod magic; mod references; mod resources; mod svg; diff --git a/src/magic.rs b/src/magic.rs deleted file mode 100644 index 9dc0183d..00000000 --- a/src/magic.rs +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2018-2020 Sebastian Wiesner - -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -//! Magic util functions for detecting image types. - -use anyhow::Result; -use anyhow::{anyhow, Context}; -use mime::Mime; -use tracing::{event, Level}; - -/// Whether the given data is SVG data. -pub fn is_svg(buffer: &[u8]) -> bool { - // If `buffer` contains a valid pixel image it's definitely not an SVG, so - // let's avoid the expensive call to "file" here. - image::guess_format(buffer).is_err() - && get_mimetype_for_buffer(buffer).map_or_else( - |error| { - event!( - Level::WARN, - ?error, - "failed to determine mime type: {}", - error - ); - false - }, - |detected| detected == mime::IMAGE_SVG, - ) -} - -/// Whether the given data is a PNG image. -pub fn is_png(buffer: &[u8]) -> bool { - match image::guess_format(buffer) { - Ok(image::ImageFormat::Png) => true, - Ok(_) => false, - Err(error) => { - event!( - Level::WARN, - ?error, - "failed to guess image format: {}", - error - ); - false - } - } -} - -#[cfg(feature = "magic")] -mod cookie { - use anyhow::{Context, Result}; - use magic::{Cookie, CookieFlags}; - use once_cell::sync::Lazy; - use tracing::{event, Level}; - - fn create() -> Result { - let flags = CookieFlags::MIME_TYPE - | CookieFlags::ERROR - // Disable all the things we don't need for SVG checking - | CookieFlags::NO_CHECK_COMPRESS - | CookieFlags::NO_CHECK_TAR - | CookieFlags::NO_CHECK_APPTYPE - | CookieFlags::NO_CHECK_ELF - | CookieFlags::NO_CHECK_ENCODING - | CookieFlags::NO_CHECK_JSON - | CookieFlags::NO_CHECK_CSV; - let cookie = - Cookie::open(flags).with_context(|| "Failed to create magic cookie".to_string())?; - cookie - .load::<&str>(Default::default()) - .with_context(|| "Failed to load system magic database into cookie".to_string())?; - Ok(cookie) - } - - fn create_or_log() -> Option { - match create() { - Ok(cookie) => Some(cookie), - Err(error) => { - event!( - Level::ERROR, - ?error, - "Failed to initialize magic cookie: {}", - error - ); - None - } - } - } - - thread_local!(pub static DEFAULT: Lazy> = Lazy::new(create_or_log)); -} - -#[cfg(feature = "magic")] -fn get_mimetype_for_buffer(buffer: &[u8]) -> Result { - cookie::DEFAULT.with(|cookie| match cookie.as_ref() { - Some(cookie) => { - let result = cookie - .buffer(buffer) - .with_context(|| "Failed to check mimetype of buffer with magic cookie")?; - result - .parse::() - .with_context(|| format!("Failed to parse mime type from result: {result}")) - } - None => { - event!( - Level::DEBUG, - "Magic cookie not available, falling back to 'file' program" - ); - get_mimetype_for_buffer_with_file(buffer) - } - }) -} - -#[cfg(not(feature = "magic"))] -fn get_mimetype_for_buffer(buffer: &[u8]) -> Result { - get_mimetype_for_buffer_with_file(buffer) -} - -fn get_mimetype_for_buffer_with_file(buffer: &[u8]) -> Result { - use std::io::prelude::*; - use std::io::ErrorKind; - use std::process::*; - - let mut process = Command::new("file") - .arg("--brief") - .arg("--mime-type") - .arg("-") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .with_context(|| "Failed to spawn mime --brief --mime-type")?; - - process - .stdin - .as_mut() - .expect("Forgot to pipe stdin?") - .write_all(buffer) - .or_else(|error| match error.kind() { - ErrorKind::BrokenPipe => Ok(()), - _ => Err(error), - })?; - - let output = process - .wait_with_output() - .with_context(|| "Failed to read output from mime --brief --mime-type")?; - if output.status.success() { - let stdout = std::str::from_utf8(&output.stdout) - .with_context(|| { - format!( - "mime --brief --mime-type returned non-utf8: {:?}", - output.stdout - ) - })? - .trim(); - let detected_type = stdout - .parse::() - .with_context(|| format!("Failed to parse mime type from output: {stdout}"))?; - Ok(detected_type) - } else { - Err(anyhow!( - "file --brief --mime-type failed with status {}: {}", - output.status, - String::from_utf8_lossy(&output.stderr) - )) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn detect_mimetype_of_png_image() { - let data = include_bytes!("../sample/rust-logo-128x128.png"); - assert!(is_png(data)); - } - - #[test] - fn detect_mimetype_of_svg_image() { - let data = include_bytes!("../sample/rust-logo.svg"); - assert!(is_svg(data)); - } - - #[test] - fn detect_mimetype_of_magic_param_bytes_max_length() { - let data = std::iter::repeat(b'\0') - .take(1_048_576) - .collect::>(); - assert!(!is_svg(&data)); - } - - #[test] - fn detect_mimetype_of_larger_than_magic_param_bytes_max_length() { - let data = std::iter::repeat(b'\0') - .take(1_048_576 * 2) - .collect::>(); - assert!(!is_svg(&data)); - } -} diff --git a/src/resources.rs b/src/resources.rs index 6b675a6a..7e7b557a 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -6,12 +6,17 @@ //! Access to resources referenced from markdown documents. -use anyhow::{anyhow, Context, Result}; -use once_cell::sync::Lazy; -use reqwest::blocking::{Client, ClientBuilder}; use std::fs::File; use std::io::prelude::*; use std::time::Duration; + +use anyhow::{anyhow, Context, Result}; +use mime::Mime; +use once_cell::sync::Lazy; +use reqwest::{ + blocking::{Client, ClientBuilder}, + header::CONTENT_TYPE, +}; use tracing::{event, Level}; use url::Url; @@ -72,7 +77,7 @@ fn is_local(url: &Url) -> bool { /// Read size limit for resources. static RESOURCE_READ_LIMIT: u64 = 104_857_600; -fn fetch_http(url: &Url) -> Result> { +fn fetch_http(url: &Url) -> Result<(Option, Vec)> { let response = CLIENT .as_ref() .with_context(|| "HTTP client not available".to_owned())? @@ -81,6 +86,23 @@ fn fetch_http(url: &Url) -> Result> { .with_context(|| format!("Failed to GET {url}"))? .error_for_status()?; + let content_type = response.headers().get(CONTENT_TYPE); + event!( + Level::DEBUG, + "Raw Content-Type of remote resource {}: {:?}", + &url, + &content_type + ); + let mime_type = content_type + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()); + event!( + Level::DEBUG, + "Parsed Content-Type of remote resource {}: {:?}", + &url, + &mime_type + ); + match response.content_length() { // The server gave us no content size so read until the end of the stream, but not more than our read limit. None => { @@ -97,7 +119,7 @@ fn fetch_http(url: &Url) -> Result> { "Contents of {url} exceeded {RESOURCE_READ_LIMIT}, rejected", )) } else { - Ok(buffer) + Ok((mime_type, buffer)) } } // If we've got a content-size use it to read exactly as many bytes as the server told us to do (within limits) @@ -114,7 +136,7 @@ fn fetch_http(url: &Url) -> Result> { .read_exact(buffer.as_mut_slice()) .with_context(|| format!("Failed to read from {url}"))?; - Ok(buffer) + Ok((mime_type, buffer)) } } } @@ -130,7 +152,7 @@ fn fetch_http(url: &Url) -> Result> { /// /// We currently support `file:` URLs which the underlying operation system can /// read (local on UNIX, UNC paths on Windows), and HTTP(S) URLs. -pub fn read_url(url: &Url, access: ResourceAccess) -> Result> { +pub fn read_url(url: &Url, access: ResourceAccess) -> Result<(Option, Vec)> { if !access.permits(url) { return Err(anyhow!( "Access denied to URL {} by policy {:?}", @@ -142,7 +164,7 @@ pub fn read_url(url: &Url, access: ResourceAccess) -> Result> { "file" => match url.to_file_path() { Ok(path) => { let mut buffer = Vec::new(); - File::open(path) + File::open(&path) .with_context(|| format!("Failed to open file at {url}"))? // Read a byte more than the limit differentiate an expected EOF from hitting the limit .take(RESOURCE_READ_LIMIT + 1) @@ -154,7 +176,15 @@ pub fn read_url(url: &Url, access: ResourceAccess) -> Result> { "Contents of {url} exceeded {RESOURCE_READ_LIMIT}, rejected", )) } else { - Ok(buffer) + let mime_type = mime_guess::from_path(&path).first(); + if mime_type.is_none() { + event!( + Level::DEBUG, + "Failed to guess mime type from {}", + path.display() + ); + } + Ok((mime_type, buffer)) } } Err(_) => Err(anyhow!("Cannot convert URL {url} to file path")), @@ -194,6 +224,19 @@ mod tests { assert!(ResourceAccess::RemoteAllowed.permits(&resource)); } + #[test] + fn read_url_with_local_path_returns_content_type() { + let cwd = Url::from_directory_path(std::env::current_dir().unwrap()).unwrap(); + + let resource = cwd.join("sample/rust-logo.svg").unwrap(); + let (mime_type, _) = read_url(&resource, ResourceAccess::LocalOnly).unwrap(); + assert_eq!(mime_type, Some(mime::IMAGE_SVG)); + + let resource = cwd.join("sample/rust-logo-128x128.png").unwrap(); + let (mime_type, _) = read_url(&resource, ResourceAccess::LocalOnly).unwrap(); + assert_eq!(mime_type, Some(mime::IMAGE_PNG)); + } + #[test] fn read_url_with_http_url_fails_if_local_only_access() { let url = "https://eu.httpbin.org/status/404" @@ -228,17 +271,33 @@ mod tests { .unwrap(); let result = read_url(&url, ResourceAccess::RemoteAllowed); assert!(result.is_ok(), "Unexpected error: {result:?}"); - assert_eq!(result.unwrap().len(), 100); + let (_, contents) = result.unwrap(); + assert_eq!(contents.len(), 100); + } + + #[test] + fn read_url_with_http_url_returns_content_type() { + let mut url = "https://eu.httpbin.org/response-headers" + .parse::() + .unwrap(); + url.query_pairs_mut() + .append_pair("Content-Type", "image/svg+xml"); + let result = read_url(&url, ResourceAccess::RemoteAllowed); + assert!(result.is_ok(), "Unexpected error: {result:?}"); + let (mime_type, _) = result.unwrap(); + assert_eq!(mime_type, Some(mime::IMAGE_SVG)); } #[test] fn read_url_with_http_url_fails_when_size_limit_is_exceeded() { - let url = "https://eu.httpbin.org/response-headers?content-length=115343400" + let mut url = "https://eu.httpbin.org/response-headers" .parse::() .unwrap(); + url.query_pairs_mut() + .append_pair("Content-Length", "115343400"); let result = read_url(&url, ResourceAccess::RemoteAllowed); assert!(result.is_err(), "Unexpected success: {result:?}"); let error = format!("{:#}", result.unwrap_err()); - assert_eq!(error, "https://eu.httpbin.org/response-headers?content-length=115343400 reports size 115343400 which exceeds limit 104857600, refusing to read") + assert_eq!(error, "https://eu.httpbin.org/response-headers?Content-Length=115343400 reports size 115343400 which exceeds limit 104857600, refusing to read") } } diff --git a/src/terminal/capabilities/iterm2.rs b/src/terminal/capabilities/iterm2.rs index 95757f6a..b26a1ce2 100644 --- a/src/terminal/capabilities/iterm2.rs +++ b/src/terminal/capabilities/iterm2.rs @@ -18,7 +18,7 @@ use url::Url; use crate::resources::read_url; use crate::svg; use crate::terminal::osc::write_osc; -use crate::{magic, ResourceAccess}; +use crate::ResourceAccess; /// Iterm2 marks. #[derive(Debug, Copy, Clone)] @@ -66,8 +66,8 @@ impl ITerm2Images { /// Render the binary content of the (rendered) image or an IO error if /// reading or rendering failed. pub fn read_and_render(self, url: &Url, access: ResourceAccess) -> Result> { - let contents = read_url(url, access)?; - if magic::is_svg(&contents) { + let (mime_type, contents) = read_url(url, access)?; + if mime_type == Some(mime::IMAGE_SVG) { svg::render_svg(&contents).with_context(|| format!("Failed to render SVG at URL {url}")) } else { Ok(contents) diff --git a/src/terminal/capabilities/kitty.rs b/src/terminal/capabilities/kitty.rs index 7e40a209..d2a78a8e 100644 --- a/src/terminal/capabilities/kitty.rs +++ b/src/terminal/capabilities/kitty.rs @@ -28,7 +28,7 @@ use url::Url; use crate::resources::read_url; use crate::svg::render_svg; use crate::terminal::size::PixelSize; -use crate::{magic, ResourceAccess}; +use crate::ResourceAccess; /// Provides access to printing images for kitty. #[derive(Debug, Copy, Clone)] @@ -114,8 +114,8 @@ impl KittyImages { access: ResourceAccess, terminal_size: PixelSize, ) -> Result { - let contents = read_url(url, access)?; - let image = if magic::is_svg(&contents) { + let (mime_type, contents) = read_url(url, access)?; + let image = if mime_type == Some(mime::IMAGE_SVG) { image::load_from_memory( &render_svg(&contents) .with_context(|| format!("Failed to render SVG at {url} to PNG"))?, @@ -126,7 +126,9 @@ impl KittyImages { .with_context(|| format!("Failed to load image from URL {url}"))? }; - if magic::is_png(&contents) && PixelSize::from_xy(image.dimensions()) <= terminal_size { + if mime_type == Some(mime::IMAGE_PNG) + && PixelSize::from_xy(image.dimensions()) <= terminal_size + { Ok(self.render_as_png(contents)) } else { Ok(self.render_as_rgb_or_rgba(image, terminal_size))