From 2e7ca7b19beeec95589196faa022b733cc5fec31 Mon Sep 17 00:00:00 2001 From: Sebastian Wiesner Date: Tue, 21 Feb 2023 20:26:27 +0100 Subject: [PATCH] Do not sniff mime types Instead rely on the Content-Type header (remote resources) and the file extension (local resources) to identify SVG images. This removes the dependency on the file tool and on libmagic, and also the entire magic module, which simplifies the code and the build a bit, and removes a runtime dependency. It also effectively enables SVG images on Windows which so far did mostly not work because Windows neither includes the file tool nor libmagic, so users had to go out of their way and install file/libmagic to get SVG images support on Windows. --- .github/workflows/test.yml | 2 - CHANGELOG.md | 6 +- Cargo.lock | 35 ++--- Cargo.toml | 2 +- README.md | 18 +-- mdcat.1.adoc | 8 +- src/lib.rs | 1 - src/magic.rs | 201 ---------------------------- src/resources.rs | 83 ++++++++++-- src/terminal/capabilities/iterm2.rs | 6 +- src/terminal/capabilities/kitty.rs | 10 +- 11 files changed, 105 insertions(+), 267 deletions(-) delete mode 100644 src/magic.rs diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f63a9998..ba2a1545 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -68,8 +68,6 @@ jobs: include: - target: x86_64-unknown-linux-gnu os: ubuntu-latest - # On Linux link against libmagic, since it's almost always available - cargo_opts: "--features magic" - target: x86_64-unknown-linux-musl os: ubuntu-latest cargo_opts: "--no-default-features --features static" diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fc89f6a..f0f35f34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,17 +8,17 @@ Use `cargo release` to create a new release. ## [Unreleased] -### Added -- With `--features magic` use `libmagic` to check for SVG data, instead of shelling out to `file` (see [GH-236]). - ### Changed - Update all dependencies. +- No longer sniff mime type from contents to identify SVG images. + Instead rely on the `Content-Type` header for HTTP(S) images and the file extension for local resources (see [GH-239]). ### Fixed - Use `less -r` instead of `less -R` in `mdless` if both `$PAGER` and `$MDCAT_PAGER` are unset (see [GH-238]). [GH-236]: https://github.com/swsnr/mdcat/pull/236 [GH-238]: https://github.com/swsnr/mdcat/issues/238 +[GH-239]: https://github.com/swsnr/mdcat/pull/239 ## [1.0.0] – 2023-01-07 diff --git a/Cargo.lock b/Cargo.lock index 338ae379..3b5402e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -811,29 +811,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "magic" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87142e3acb1f4daa62eaea96605421a534119d4777a9fb43fb2784798fd89665" -dependencies = [ - "bitflags", - "errno", - "libc", - "magic-sys", - "thiserror", -] - -[[package]] -name = "magic-sys" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eff86ae08895140d628119d407d568f3b657145ee8c265878064f717534bb3bc" -dependencies = [ - "libc", - "vcpkg", -] - [[package]] name = "matchers" version = "0.1.0" @@ -858,8 +835,8 @@ dependencies = [ "image", "lazy_static", "libc", - "magic", "mime", + "mime_guess", "once_cell", "pretty_assertions", "pulldown-cmark", @@ -894,6 +871,16 @@ version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +[[package]] +name = "mime_guess" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "miniz_oxide" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 3bf33202..90fe975a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ env_proxy = "0.4.1" gethostname = "0.4.1" image = "0.24.5" mime = "0.3.16" +mime_guess = "2.0.4" pulldown-cmark = { version = "0.9.2", default-features = false, features = ['simd'] } shell-words = "1.1.0" tracing = { version = "0.1.37", features = ["release_max_level_warn"] } @@ -35,7 +36,6 @@ tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } url = "2.3.1" reqwest = { version = "0.11.14", default-features = false, features = ["gzip", "brotli", "blocking"]} once_cell = "1.17.0" -magic = { version = "0.13.0", optional = true } [dependencies.syntect] version = "5.0.0" diff --git a/README.md b/README.md index 0a5de3f1..acb0f3ee 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,12 @@ Try `mdcat --help` or read the [mdcat(1)](./mdcat.1.adoc) manpage. * [Chocolatey]: `choco install mdcat` * You can also build `mdcat` manually with `cargo install mdcat`. +[Homebrew]: https://brew.sh +[MacPorts]: https://www.macports.org +[Arch Linux]: https://www.archlinux.org/packages/community/x86_64/mdcat/ +[scoop]: https://github.com/lukesampson/scoop +[chocolatey]: https://github.com/chocolatey + ## Building Run `cargo build --release`. @@ -79,8 +85,6 @@ The resulting `mdcat` executable links against the system's SSL library, i.e. op To build a self-contained executable use `cargo build --features=static`; the resulting executable uses a pure Rust SSL implementation. It still uses the system's CA roots however. -Pass `--features magic` to use `libmagic` instead of the `file` program to check mimetypes. - The build process also generates the following additional files in `$OUT_DIR`: * Completions for Bash, Zsh, Fish, and Powershell, for both `mdcat` and `mdless`, in `completions` sub-directory. @@ -92,16 +96,6 @@ If you package mdcat you may want to include these files too. [AsciiDoctor]: https://asciidoctor.org/ -## Requirements - -If `mdcat` wasn't built with the `magic` feature, image type detection requires the `file` tool with support for `--brief` and `--mime-type` flags to be available in `$PATH`. - -[Homebrew]: https://brew.sh -[MacPorts]: https://www.macports.org -[Arch Linux]: https://www.archlinux.org/packages/community/x86_64/mdcat/ -[scoop]: https://github.com/lukesampson/scoop -[chocolatey]: https://github.com/chocolatey - ## Troubleshooting `mdcat` can output extensive tracing information when asked to. diff --git a/mdcat.1.adoc b/mdcat.1.adoc index a0ccda66..3737afb0 100644 --- a/mdcat.1.adoc +++ b/mdcat.1.adoc @@ -53,10 +53,7 @@ In particular this disables all image support which relies on proprietary escape In iTerm2, Kitty, Terminology and WezTerm mdcat prints inline images. mdcat supports most standard pixel formats by default. -mdcat silently ignores images larger than 100 MiB. - -For some terminals mdcat needs to detect the mimetype. -If `mdcat` was not built with `libmagic` (see output of `--version` to check) it uses the `file --brief --mime-type` for this purpose, and thus requires a compatible `file` utility in `$PATH`. +mdcat silently ignores images larger than 100 MiB, under the assumption that images of that size cannot reasonably be rendered in a terminal. === SVG support @@ -65,6 +62,9 @@ In Terminology mdcat also renders SVG images, using the built-in support of Term In iTerm2, Kitty and WezTerm mdcat requires `rsvg-convert` to render SVG images to pixel graphics before displaying them; if `rsvg-convert` is not found in `$PATH` mdcat does not render SVG images in these terminals. +For local SVG files mdcat relies on the file extension to identify SVG images. +For remote images from HTTP(S) URLs mdcat inspects the `Content-Type` header to identify SVG images. + === HTTP/HTTPS support mdcat fetches images from HTTP(S) URLs for rendering if the underlying terminal supports image rendering; diff --git a/src/lib.rs b/src/lib.rs index 4df8b794..5d93f3dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,6 @@ use crate::terminal::capabilities::TerminalCapabilities; use crate::terminal::TerminalSize; use url::Url; -mod magic; mod references; mod resources; mod svg; diff --git a/src/magic.rs b/src/magic.rs deleted file mode 100644 index 9dc0183d..00000000 --- a/src/magic.rs +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2018-2020 Sebastian Wiesner - -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -//! Magic util functions for detecting image types. - -use anyhow::Result; -use anyhow::{anyhow, Context}; -use mime::Mime; -use tracing::{event, Level}; - -/// Whether the given data is SVG data. -pub fn is_svg(buffer: &[u8]) -> bool { - // If `buffer` contains a valid pixel image it's definitely not an SVG, so - // let's avoid the expensive call to "file" here. - image::guess_format(buffer).is_err() - && get_mimetype_for_buffer(buffer).map_or_else( - |error| { - event!( - Level::WARN, - ?error, - "failed to determine mime type: {}", - error - ); - false - }, - |detected| detected == mime::IMAGE_SVG, - ) -} - -/// Whether the given data is a PNG image. -pub fn is_png(buffer: &[u8]) -> bool { - match image::guess_format(buffer) { - Ok(image::ImageFormat::Png) => true, - Ok(_) => false, - Err(error) => { - event!( - Level::WARN, - ?error, - "failed to guess image format: {}", - error - ); - false - } - } -} - -#[cfg(feature = "magic")] -mod cookie { - use anyhow::{Context, Result}; - use magic::{Cookie, CookieFlags}; - use once_cell::sync::Lazy; - use tracing::{event, Level}; - - fn create() -> Result { - let flags = CookieFlags::MIME_TYPE - | CookieFlags::ERROR - // Disable all the things we don't need for SVG checking - | CookieFlags::NO_CHECK_COMPRESS - | CookieFlags::NO_CHECK_TAR - | CookieFlags::NO_CHECK_APPTYPE - | CookieFlags::NO_CHECK_ELF - | CookieFlags::NO_CHECK_ENCODING - | CookieFlags::NO_CHECK_JSON - | CookieFlags::NO_CHECK_CSV; - let cookie = - Cookie::open(flags).with_context(|| "Failed to create magic cookie".to_string())?; - cookie - .load::<&str>(Default::default()) - .with_context(|| "Failed to load system magic database into cookie".to_string())?; - Ok(cookie) - } - - fn create_or_log() -> Option { - match create() { - Ok(cookie) => Some(cookie), - Err(error) => { - event!( - Level::ERROR, - ?error, - "Failed to initialize magic cookie: {}", - error - ); - None - } - } - } - - thread_local!(pub static DEFAULT: Lazy> = Lazy::new(create_or_log)); -} - -#[cfg(feature = "magic")] -fn get_mimetype_for_buffer(buffer: &[u8]) -> Result { - cookie::DEFAULT.with(|cookie| match cookie.as_ref() { - Some(cookie) => { - let result = cookie - .buffer(buffer) - .with_context(|| "Failed to check mimetype of buffer with magic cookie")?; - result - .parse::() - .with_context(|| format!("Failed to parse mime type from result: {result}")) - } - None => { - event!( - Level::DEBUG, - "Magic cookie not available, falling back to 'file' program" - ); - get_mimetype_for_buffer_with_file(buffer) - } - }) -} - -#[cfg(not(feature = "magic"))] -fn get_mimetype_for_buffer(buffer: &[u8]) -> Result { - get_mimetype_for_buffer_with_file(buffer) -} - -fn get_mimetype_for_buffer_with_file(buffer: &[u8]) -> Result { - use std::io::prelude::*; - use std::io::ErrorKind; - use std::process::*; - - let mut process = Command::new("file") - .arg("--brief") - .arg("--mime-type") - .arg("-") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .with_context(|| "Failed to spawn mime --brief --mime-type")?; - - process - .stdin - .as_mut() - .expect("Forgot to pipe stdin?") - .write_all(buffer) - .or_else(|error| match error.kind() { - ErrorKind::BrokenPipe => Ok(()), - _ => Err(error), - })?; - - let output = process - .wait_with_output() - .with_context(|| "Failed to read output from mime --brief --mime-type")?; - if output.status.success() { - let stdout = std::str::from_utf8(&output.stdout) - .with_context(|| { - format!( - "mime --brief --mime-type returned non-utf8: {:?}", - output.stdout - ) - })? - .trim(); - let detected_type = stdout - .parse::() - .with_context(|| format!("Failed to parse mime type from output: {stdout}"))?; - Ok(detected_type) - } else { - Err(anyhow!( - "file --brief --mime-type failed with status {}: {}", - output.status, - String::from_utf8_lossy(&output.stderr) - )) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn detect_mimetype_of_png_image() { - let data = include_bytes!("../sample/rust-logo-128x128.png"); - assert!(is_png(data)); - } - - #[test] - fn detect_mimetype_of_svg_image() { - let data = include_bytes!("../sample/rust-logo.svg"); - assert!(is_svg(data)); - } - - #[test] - fn detect_mimetype_of_magic_param_bytes_max_length() { - let data = std::iter::repeat(b'\0') - .take(1_048_576) - .collect::>(); - assert!(!is_svg(&data)); - } - - #[test] - fn detect_mimetype_of_larger_than_magic_param_bytes_max_length() { - let data = std::iter::repeat(b'\0') - .take(1_048_576 * 2) - .collect::>(); - assert!(!is_svg(&data)); - } -} diff --git a/src/resources.rs b/src/resources.rs index 6b675a6a..7e7b557a 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -6,12 +6,17 @@ //! Access to resources referenced from markdown documents. -use anyhow::{anyhow, Context, Result}; -use once_cell::sync::Lazy; -use reqwest::blocking::{Client, ClientBuilder}; use std::fs::File; use std::io::prelude::*; use std::time::Duration; + +use anyhow::{anyhow, Context, Result}; +use mime::Mime; +use once_cell::sync::Lazy; +use reqwest::{ + blocking::{Client, ClientBuilder}, + header::CONTENT_TYPE, +}; use tracing::{event, Level}; use url::Url; @@ -72,7 +77,7 @@ fn is_local(url: &Url) -> bool { /// Read size limit for resources. static RESOURCE_READ_LIMIT: u64 = 104_857_600; -fn fetch_http(url: &Url) -> Result> { +fn fetch_http(url: &Url) -> Result<(Option, Vec)> { let response = CLIENT .as_ref() .with_context(|| "HTTP client not available".to_owned())? @@ -81,6 +86,23 @@ fn fetch_http(url: &Url) -> Result> { .with_context(|| format!("Failed to GET {url}"))? .error_for_status()?; + let content_type = response.headers().get(CONTENT_TYPE); + event!( + Level::DEBUG, + "Raw Content-Type of remote resource {}: {:?}", + &url, + &content_type + ); + let mime_type = content_type + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()); + event!( + Level::DEBUG, + "Parsed Content-Type of remote resource {}: {:?}", + &url, + &mime_type + ); + match response.content_length() { // The server gave us no content size so read until the end of the stream, but not more than our read limit. None => { @@ -97,7 +119,7 @@ fn fetch_http(url: &Url) -> Result> { "Contents of {url} exceeded {RESOURCE_READ_LIMIT}, rejected", )) } else { - Ok(buffer) + Ok((mime_type, buffer)) } } // If we've got a content-size use it to read exactly as many bytes as the server told us to do (within limits) @@ -114,7 +136,7 @@ fn fetch_http(url: &Url) -> Result> { .read_exact(buffer.as_mut_slice()) .with_context(|| format!("Failed to read from {url}"))?; - Ok(buffer) + Ok((mime_type, buffer)) } } } @@ -130,7 +152,7 @@ fn fetch_http(url: &Url) -> Result> { /// /// We currently support `file:` URLs which the underlying operation system can /// read (local on UNIX, UNC paths on Windows), and HTTP(S) URLs. -pub fn read_url(url: &Url, access: ResourceAccess) -> Result> { +pub fn read_url(url: &Url, access: ResourceAccess) -> Result<(Option, Vec)> { if !access.permits(url) { return Err(anyhow!( "Access denied to URL {} by policy {:?}", @@ -142,7 +164,7 @@ pub fn read_url(url: &Url, access: ResourceAccess) -> Result> { "file" => match url.to_file_path() { Ok(path) => { let mut buffer = Vec::new(); - File::open(path) + File::open(&path) .with_context(|| format!("Failed to open file at {url}"))? // Read a byte more than the limit differentiate an expected EOF from hitting the limit .take(RESOURCE_READ_LIMIT + 1) @@ -154,7 +176,15 @@ pub fn read_url(url: &Url, access: ResourceAccess) -> Result> { "Contents of {url} exceeded {RESOURCE_READ_LIMIT}, rejected", )) } else { - Ok(buffer) + let mime_type = mime_guess::from_path(&path).first(); + if mime_type.is_none() { + event!( + Level::DEBUG, + "Failed to guess mime type from {}", + path.display() + ); + } + Ok((mime_type, buffer)) } } Err(_) => Err(anyhow!("Cannot convert URL {url} to file path")), @@ -194,6 +224,19 @@ mod tests { assert!(ResourceAccess::RemoteAllowed.permits(&resource)); } + #[test] + fn read_url_with_local_path_returns_content_type() { + let cwd = Url::from_directory_path(std::env::current_dir().unwrap()).unwrap(); + + let resource = cwd.join("sample/rust-logo.svg").unwrap(); + let (mime_type, _) = read_url(&resource, ResourceAccess::LocalOnly).unwrap(); + assert_eq!(mime_type, Some(mime::IMAGE_SVG)); + + let resource = cwd.join("sample/rust-logo-128x128.png").unwrap(); + let (mime_type, _) = read_url(&resource, ResourceAccess::LocalOnly).unwrap(); + assert_eq!(mime_type, Some(mime::IMAGE_PNG)); + } + #[test] fn read_url_with_http_url_fails_if_local_only_access() { let url = "https://eu.httpbin.org/status/404" @@ -228,17 +271,33 @@ mod tests { .unwrap(); let result = read_url(&url, ResourceAccess::RemoteAllowed); assert!(result.is_ok(), "Unexpected error: {result:?}"); - assert_eq!(result.unwrap().len(), 100); + let (_, contents) = result.unwrap(); + assert_eq!(contents.len(), 100); + } + + #[test] + fn read_url_with_http_url_returns_content_type() { + let mut url = "https://eu.httpbin.org/response-headers" + .parse::() + .unwrap(); + url.query_pairs_mut() + .append_pair("Content-Type", "image/svg+xml"); + let result = read_url(&url, ResourceAccess::RemoteAllowed); + assert!(result.is_ok(), "Unexpected error: {result:?}"); + let (mime_type, _) = result.unwrap(); + assert_eq!(mime_type, Some(mime::IMAGE_SVG)); } #[test] fn read_url_with_http_url_fails_when_size_limit_is_exceeded() { - let url = "https://eu.httpbin.org/response-headers?content-length=115343400" + let mut url = "https://eu.httpbin.org/response-headers" .parse::() .unwrap(); + url.query_pairs_mut() + .append_pair("Content-Length", "115343400"); let result = read_url(&url, ResourceAccess::RemoteAllowed); assert!(result.is_err(), "Unexpected success: {result:?}"); let error = format!("{:#}", result.unwrap_err()); - assert_eq!(error, "https://eu.httpbin.org/response-headers?content-length=115343400 reports size 115343400 which exceeds limit 104857600, refusing to read") + assert_eq!(error, "https://eu.httpbin.org/response-headers?Content-Length=115343400 reports size 115343400 which exceeds limit 104857600, refusing to read") } } diff --git a/src/terminal/capabilities/iterm2.rs b/src/terminal/capabilities/iterm2.rs index 95757f6a..b26a1ce2 100644 --- a/src/terminal/capabilities/iterm2.rs +++ b/src/terminal/capabilities/iterm2.rs @@ -18,7 +18,7 @@ use url::Url; use crate::resources::read_url; use crate::svg; use crate::terminal::osc::write_osc; -use crate::{magic, ResourceAccess}; +use crate::ResourceAccess; /// Iterm2 marks. #[derive(Debug, Copy, Clone)] @@ -66,8 +66,8 @@ impl ITerm2Images { /// Render the binary content of the (rendered) image or an IO error if /// reading or rendering failed. pub fn read_and_render(self, url: &Url, access: ResourceAccess) -> Result> { - let contents = read_url(url, access)?; - if magic::is_svg(&contents) { + let (mime_type, contents) = read_url(url, access)?; + if mime_type == Some(mime::IMAGE_SVG) { svg::render_svg(&contents).with_context(|| format!("Failed to render SVG at URL {url}")) } else { Ok(contents) diff --git a/src/terminal/capabilities/kitty.rs b/src/terminal/capabilities/kitty.rs index 7e40a209..d2a78a8e 100644 --- a/src/terminal/capabilities/kitty.rs +++ b/src/terminal/capabilities/kitty.rs @@ -28,7 +28,7 @@ use url::Url; use crate::resources::read_url; use crate::svg::render_svg; use crate::terminal::size::PixelSize; -use crate::{magic, ResourceAccess}; +use crate::ResourceAccess; /// Provides access to printing images for kitty. #[derive(Debug, Copy, Clone)] @@ -114,8 +114,8 @@ impl KittyImages { access: ResourceAccess, terminal_size: PixelSize, ) -> Result { - let contents = read_url(url, access)?; - let image = if magic::is_svg(&contents) { + let (mime_type, contents) = read_url(url, access)?; + let image = if mime_type == Some(mime::IMAGE_SVG) { image::load_from_memory( &render_svg(&contents) .with_context(|| format!("Failed to render SVG at {url} to PNG"))?, @@ -126,7 +126,9 @@ impl KittyImages { .with_context(|| format!("Failed to load image from URL {url}"))? }; - if magic::is_png(&contents) && PixelSize::from_xy(image.dimensions()) <= terminal_size { + if mime_type == Some(mime::IMAGE_PNG) + && PixelSize::from_xy(image.dimensions()) <= terminal_size + { Ok(self.render_as_png(contents)) } else { Ok(self.render_as_rgb_or_rgba(image, terminal_size))