From 564599c1e1d948f8477e1456a1d51e55588a68b3 Mon Sep 17 00:00:00 2001
From: Dennis Snell <dennis.snell@automattic.com>
Date: Fri, 25 Aug 2023 19:51:35 -0500
Subject: [PATCH] WIP: Refactor ImageCDN parsing to rely on HTML API instead of
 RegExps

The introduction of the HTML API into WordPress 6.2 offers a new method
of matching and modifying HTML. In this patch we're replacing code that
attempts to parse the input HTML and extract images that are direct
children of an anchor ("A" tag), then read and modify them based on
the values of their attributes and computed Photon properties.

In the previous code the `Image_CDN` class scanned the entire HTML
document to generate a list of PREG image match objects, then iterated
over those matches and performed string-replace operations on them.

Now the class does a pass from start to finish, visting each image
tag along the way, and making the appropriate modifications. Extra
care is taken to ensure that only images that are the single child
of a link are matched.

In this change the values of the `tag` key in some of the filters
has changed from the initial matched HTML snippet to the name of the
image tag, which could be `IMG` or `AMP-IMG` or `AMP-ANIM`. An update
to the Tag Processor or a custom sub-class thereof could provide the
original HTML snippet and match the existing behavior, but that hasn't
been done in this patch yet given the author's uncertainty about the
use and value of those snippets.
---
 .../image-cdn/src/class-image-cdn.php         | 555 ++++++++++--------
 1 file changed, 311 insertions(+), 244 deletions(-)

diff --git a/projects/packages/image-cdn/src/class-image-cdn.php b/projects/packages/image-cdn/src/class-image-cdn.php
index 6d0e58f59764b..5a54072aec812 100644
--- a/projects/packages/image-cdn/src/class-image-cdn.php
+++ b/projects/packages/image-cdn/src/class-image-cdn.php
@@ -279,9 +279,40 @@ public static function filter_photon_norezise_maybe_inject_sizes_api( $sizes, $a
 	 * * IN-CONTENT IMAGE MANIPULATION FUNCTIONS
 	 **/
 
+	/**
+	 * Given a `class` attribute value, return the first class name with
+	 * the given prefix, excluding that prefix in the return value.
+	 *
+	 * @since {VERSION NUMBER}
+	 *
+	 * Example:
+	 *
+	 *     'full-width' = self::parse_class_name_prefixed_by( 'id-5 size-full-width duotone-gray size-wide', 'size-' );
+	 *     null         = self::parse_class_name_prefixed_by( 'id-5 size-full-width duotone-gray size-wide', 'lazy-' );
+	 *     'id-5'       = self::parse_class_name_prefixed_by( 'id-5 size-full-width duotone-gray size-wide', '' );
+	 *
+	 * @param string $class  Value of an HTML `class` attribute.
+	 * @param string $prefix A returned class name must start with this prefix.
+	 * @return string|null part of a class name that follows the given prefix if found, else null.
+	 */
+	public static function parse_class_name_prefixed_by( $class, $prefix ) {
+		if ( ! is_string( $class ) ) {
+			return null;
+		}
+
+		$search_pattern = sprintf( '#(?:^|[ \t\f\r\n])%s(?P<value>[^ \t\f\r\n]+)#', preg_quote( $prefix, null ) );
+		if ( preg_match( $search_pattern, $class, $match ) ) {
+			return $match['value'];
+		}
+
+		return null;
+	}
+
 	/**
 	 * Match all images and any relevant <a> tags in a block of HTML.
 	 *
+	 * @deprecated since {VERSION NUMBER}
+	 *
 	 * @param string $content Some HTML.
 	 * @return array An array of $images matches, where $images[0] is
 	 *         an array of full matches, and the link_url, img_tag,
@@ -336,297 +367,333 @@ public static function parse_dimensions_from_filename( $src ) {
 	 * @return string
 	 */
 	public static function filter_the_content( $content ) {
-		$images = self::parse_images_from_html( $content );
-
-		if ( ! empty( $images ) ) {
-			$content_width = Image_CDN_Core::get_jetpack_content_width();
-
-			$image_sizes = self::image_sizes();
-
-			$upload_dir = wp_get_upload_dir();
-
-			foreach ( $images[0] as $index => $tag ) {
-				// Default to resize, though fit may be used in certain cases where a dimension cannot be ascertained.
-				$transform = 'resize';
-
-				// Start with a clean attachment ID each time.
-				$attachment_id = false;
-
-				// Flag if we need to munge a fullsize URL.
-				$fullsize_url = false;
+		// @TODO: Do "PICTURE" and "SOURCE" belong here as well?
+		static $image_tags = array( 'IMG', 'AMP-IMG', 'AMP-ANIM' );
+		$content_width     = null;
+		$image_sizes       = null;
+		$upload_dir        = null;
+		$p                 = new WP_HTML_Tag_Processor( $content );
+
+		// Visit every image that is a direct child of a link.
+		while ( $p->next_tag( 'A' ) ) {
+			$p->set_bookmark( 'link' );
+			$link_href = $p->get_attribute( 'href' );
+			if ( ! is_string( $link_href ) || empty( $link_href ) ) {
+				continue;
+			}
 
-				// Identify image source.
-				$src_orig = $images['img_url'][ $index ];
-				$src      = $src_orig;
+			// If there are no more tags then there is nothing left to do.
+			if ( ! $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) {
+				break;
+			}
 
-				/**
-				 * Allow specific images to be skipped by Photon.
-				 *
-				 * @module photon
-				 *
-				 * @since 2.0.3
-				 *
-				 * @param bool false Should Photon ignore this image. Default to false.
-				 * @param string $src Image URL.
-				 * @param string $tag Image Tag (Image HTML output).
-				 */
-				if ( apply_filters( 'jetpack_photon_skip_image', false, $src, $tag ) ) {
-					continue;
-				}
+			/*
+			 * Only examine tags that are considered an image and which are the
+			 * first node directly inside the link. If encountering a closer
+			 * then the tag is not the image being sought.
+			 */
+			if ( $p->is_tag_closer() || ! in_array( $p->get_tag(), $image_tags, true ) ) {
+				continue;
+			}
 
-				// Support Automattic's Lazy Load plugin.
-				// Can't modify $tag yet as we need unadulterated version later.
-				if ( preg_match( '#data-lazy-src=["\'](.+?)["\']#i', $images['img_tag'][ $index ], $lazy_load_src ) ) {
-					$placeholder_src_orig = $src;
-					$placeholder_src      = $placeholder_src_orig;
-					$src_orig             = $lazy_load_src[1];
-					$src                  = $src_orig;
-				} elseif ( preg_match( '#data-lazy-original=["\'](.+?)["\']#i', $images['img_tag'][ $index ], $lazy_load_src ) ) {
-					$placeholder_src_orig = $src;
-					$placeholder_src      = $placeholder_src_orig;
-					$src_orig             = $lazy_load_src[1];
-					$src                  = $src_orig;
-				}
+			$p->set_bookmark( 'image' );
 
-				// Check if image URL should be used with Photon.
-				if ( self::validate_image_url( $src ) ) {
-					// Find the width and height attributes.
-					$width  = false;
-					$height = false;
+			/*
+			 * At this point a target image has been found. Initialize the
+			 * shared data and then process each image as it appears.
+			 */
+			if ( null === $content_width ) {
+				$content_width = Image_CDN_Core::get_jetpack_content_width();
+				$image_sizes   = self::image_sizes();
+				$upload_dir    = wp_get_upload_dir();
+			}
 
-					// First, check the image tag. Note we only check for pixel sizes now; HTML4 percentages have never been correctly
-					// supported, so we stopped pretending to support them in JP 9.1.0.
-					if ( preg_match( '#[\s"\']width=["\']?([\d%]+)["\']?#i', $images['img_tag'][ $index ], $width_string ) ) {
-						$width = false === strpos( $width_string[1], '%' ) ? $width_string[1] : false;
-					}
+			// @TODO: Do the filters need the full HTML or is it enough to pass the tag name, e.g. 'AMP-IMG'?
+			$tag = $p->get_tag();
 
-					if ( preg_match( '#[\s"\']height=["\']?([\d%]+)["\']?#i', $images['img_tag'][ $index ], $height_string ) ) {
-						$height = false === strpos( $height_string[1], '%' ) ? $height_string[1] : false;
-					}
+			// Default to resize, though fit may be used in certain cases where a dimension cannot be ascertained.
+			$transform = 'resize';
 
-					// Detect WP registered image size from HTML class.
-					if ( preg_match( '#class=["\']?[^"\']*size-([^"\'\s]+)[^"\']*["\']?#i', $images['img_tag'][ $index ], $size ) ) {
-						$size = array_pop( $size );
+			// Flag if we need to munge a fullsize URL.
+			$fullsize_url = false;
 
-						if ( false === $width && false === $height && 'full' !== $size && array_key_exists( $size, $image_sizes ) ) {
-							$width     = (int) $image_sizes[ $size ]['width'];
-							$height    = (int) $image_sizes[ $size ]['height'];
-							$transform = $image_sizes[ $size ]['crop'] ? 'resize' : 'fit';
-						}
-					} else {
-						unset( $size );
-					}
+			// Identify image source.
+			$src_orig = $p->get_attribute( 'src' );
+			$src      = $src_orig;
 
-					// WP Attachment ID, if uploaded to this site.
-					if (
-						preg_match( '#class=["\']?[^"\']*wp-image-([\d]+)[^"\']*["\']?#i', $images['img_tag'][ $index ], $attachment_id ) &&
-						0 === strpos( $src, $upload_dir['baseurl'] ) &&
-						/**
-						 * Filter whether an image using an attachment ID in its class has to be uploaded to the local site to go through Photon.
-						 *
-						 * @module photon
-						 *
-						 * @since 2.0.3
-						 *
-						 * @param bool false Was the image uploaded to the local site. Default to false.
-						 * @param array $args {
-						 *   Array of image details.
-						 *
-						 *   @type $src Image URL.
-						 *   @type tag Image tag (Image HTML output).
-						 *   @type $images Array of information about the image.
-						 *   @type $index Image index.
-						 * }
-						 */
-						apply_filters( 'jetpack_photon_image_is_local', false, compact( 'src', 'tag', 'images', 'index' ) )
-					) {
-						$attachment_id = (int) array_pop( $attachment_id );
-
-						if ( $attachment_id ) {
-							$attachment = get_post( $attachment_id );
-
-							// Basic check on returned post object.
-							if ( is_object( $attachment ) && ! is_wp_error( $attachment ) && 'attachment' === $attachment->post_type ) {
-								$src_per_wp = wp_get_attachment_image_src( $attachment_id, isset( $size ) ? $size : 'full' );
-
-								if ( self::validate_image_url( $src_per_wp[0] ) ) {
-									$src          = $src_per_wp[0];
-									$fullsize_url = true;
-
-									// Prevent image distortion if a detected dimension exceeds the image's natural dimensions.
-									if ( ( false !== $width && $width > $src_per_wp[1] ) || ( false !== $height && $height > $src_per_wp[2] ) ) {
-										$width  = false === $width ? false : min( $width, $src_per_wp[1] );
-										$height = false === $height ? false : min( $height, $src_per_wp[2] );
-									}
-
-									// If no width and height are found, max out at source image's natural dimensions.
-									// Otherwise, respect registered image sizes' cropping setting.
-									if ( false === $width && false === $height ) {
-										$width     = $src_per_wp[1];
-										$height    = $src_per_wp[2];
-										$transform = 'fit';
-									} elseif ( isset( $size ) && array_key_exists( $size, $image_sizes ) && isset( $image_sizes[ $size ]['crop'] ) ) {
-										$transform = (bool) $image_sizes[ $size ]['crop'] ? 'resize' : 'fit';
-									}
-								}
-							} else {
-								unset( $attachment_id );
-								unset( $attachment );
-							}
-						}
-					}
+			/**
+			 * Allow specific images to be skipped by Photon.
+			 *
+			 * @TODO: Does this need to pass the full HTML of the image tag?
+			 *
+			 * @module photon
+			 *
+			 * @since 2.0.3
+			 * @since {VERSION NUMBER} Sends tag name instead of full tag HTML.
+			 *
+			 * @param bool false Should Photon ignore this image. Default to false.
+			 * @param string $src Image URL.
+			 * @param string $tag Image Tag (Image HTML output).
+			 */
+			if ( apply_filters( 'jetpack_photon_skip_image', false, $src, $tag ) ) {
+				continue;
+			}
 
-					// If image tag lacks width and height arguments, try to determine from strings WP appends to resized image filenames.
-					if ( false === $width && false === $height ) {
-						list( $width, $height ) = self::parse_dimensions_from_filename( $src );
-					}
+			$data_lazy_src      = $p->get_attribute( 'data-lazy-src' );
+			$data_lazy_original = $p->get_attribute( 'data-lazy-original' );
 
-					$width_orig     = $width;
-					$height_orig    = $height;
-					$transform_orig = $transform;
+			$source_type     = 'src';
+			$chosen_data_src = null;
 
-					// If width is available, constrain to $content_width.
-					if ( false !== $width && is_numeric( $content_width ) && $width > $content_width ) {
-						if ( false !== $height ) {
-							$height = round( ( $content_width * $height ) / $width );
-						}
-						$width = $content_width;
-					}
-
-					// Set a width if none is found and $content_width is available.
-					// If width is set in this manner and height is available, use `fit` instead of `resize` to prevent skewing.
-					if ( false === $width && is_numeric( $content_width ) ) {
-						$width = (int) $content_width;
+			// Prefer a URL from the `data-lazy-src` attribute.
+			if ( null === $chosen_data_src && is_string( $data_lazy_src ) && ! empty( $data_lazy_src ) ) {
+				$source_type     = 'data-lazy-src';
+				$chosen_data_src = $data_lazy_src;
+			}
 
-						if ( false !== $height ) {
-							$transform = 'fit';
-						}
-					}
+			// Fall back to a URL from the `data-lazy-original` attribute.
+			if ( null === $chosen_data_src && is_string( $data_lazy_original ) && ! empty( $data_lazy_original ) ) {
+				$source_type     = 'data-lazy-original';
+				$chosen_data_src = $data_lazy_original;
+			}
 
-					// Detect if image source is for a custom-cropped thumbnail and prevent further URL manipulation.
-					if ( ! $fullsize_url && preg_match_all( '#-e[a-z0-9]+(-\d+x\d+)?\.(' . implode( '|', self::$extensions ) . '){1}$#i', basename( $src ), $filename ) ) {
-						$fullsize_url = true;
-					}
+			// Update the src if one was provided in the `data-lazy-` attributes.
+			if ( 'src' !== $source_type ) {
+				$placeholder_src_orig = $src;
+				$placeholder_src      = $placeholder_src_orig;
+				$src_orig             = $chosen_data_src;
+				$src                  = $src_orig;
+			}
 
-					// Build URL, first maybe removing WP's resized string so we pass the original image to Photon.
-					if ( ! $fullsize_url && 0 === strpos( $src, $upload_dir['baseurl'] ) ) {
-						$src = self::strip_image_dimensions_maybe( $src );
-					}
+			// Check if image URL should be used with Photon.
+			if ( self::validate_image_url( $src ) ) {
+				$width  = $p->get_attribute( 'width' );
+				$height = $p->get_attribute( 'height' );
+
+				// First, check the image tag. Note we only check for pixel sizes now; HTML4 percentages have never been correctly
+				// supported, so we stopped pretending to support them in JP 9.1.0.
+				if ( ! is_string( $width ) || false !== strpos( $width, '%' ) ) {
+					$width = false;
+				}
 
-					// Build array of Photon args and expose to filter before passing to Photon URL function.
-					$args = array();
+				if ( ! is_string( $height ) || false !== strpos( $height, '%' ) ) {
+					$height = false;
+				}
 
-					if ( false !== $width && false !== $height ) {
-						$args[ $transform ] = $width . ',' . $height;
-					} elseif ( false !== $width ) {
-						$args['w'] = $width;
-					} elseif ( false !== $height ) {
-						$args['h'] = $height;
-					}
+				// Detect WP registered image size from HTML class.
+				$class = $p->get_attribute( 'class' );
+				$size  = self::parse_class_name_prefixed_by( $class, 'size-' );
+				if ( null !== $size && false === $width && false === $height && 'full' !== $size && array_key_exists( $size, $image_sizes ) ) {
+					$width     = (int) $image_sizes[ $size ]['width'];
+					$height    = (int) $image_sizes[ $size ]['height'];
+					$transform = $image_sizes[ $size ]['crop'] ? 'resize' : 'fit';
+				} else {
+					unset( $size );
+				}
 
+				// WP Attachment ID, if uploaded to this site.
+				$attachment_id = self::parse_class_name_prefixed_by( $class, 'wp-image-' );
+				$images        = array();
+				$index         = 0;
+				if (
+					$attachment_id &&
+					preg_match( '#^[1-9][0-9]*$#', $attachment_id ) &&
+					0 === strpos( $src, $upload_dir['baseurl'] ) &&
 					/**
-					 * Filter the array of Photon arguments added to an image when it goes through Photon.
-					 * By default, only includes width and height values.
+					 * Filter whether an image using an attachment ID in its class has to be uploaded to the local site to go through Photon.
 					 *
-					 * @see https://developer.wordpress.com/docs/photon/api/
+					 * @TODO: What is the point of passing $images and $index. Are they required?
 					 *
 					 * @module photon
 					 *
-					 * @since 2.0.0
+					 * @since 2.0.3
+					 * @since {VERSION NUMBER} Passes tag name instead of full tag HTML, does not pass $images array or $index.
 					 *
-					 * @param array $args Array of Photon Arguments.
-					 * @param array $details {
-					 *     Array of image details.
+					 * @param bool false Was the image uploaded to the local site. Default to false.
+					 * @param array $args {
+					 *   Array of image details.
 					 *
-					 *     @type string    $tag            Image tag (Image HTML output).
-					 *     @type string    $src            Image URL.
-					 *     @type string    $src_orig       Original Image URL.
-					 *     @type int|false $width          Image width.
-					 *     @type int|false $height         Image height.
-					 *     @type int|false $width_orig     Original image width before constrained by content_width.
-					 *     @type int|false $height_orig    Original Image height before constrained by content_width.
-					 *     @type string    $transform      Transform.
-					 *     @type string    $transform_orig Original transform before constrained by content_width.
+					 *   @type $src Image URL.
+					 *   @type tag Image tag (Image HTML output).
 					 * }
 					 */
-					$args = apply_filters( 'jetpack_photon_post_image_args', $args, compact( 'tag', 'src', 'src_orig', 'width', 'height', 'width_orig', 'height_orig', 'transform', 'transform_orig' ) );
+					apply_filters( 'jetpack_photon_image_is_local', false, compact( 'src', 'tag', 'images', 'index' ) )
+				) {
+					$attachment_id = (int) $attachment_id;
+					$attachment    = get_post( $attachment_id );
+
+					// Basic check on returned post object.
+					if ( is_object( $attachment ) && ! is_wp_error( $attachment ) && 'attachment' === $attachment->post_type ) {
+						$src_per_wp = wp_get_attachment_image_src( $attachment_id, isset( $size ) ? $size : 'full' );
+
+						if ( self::validate_image_url( $src_per_wp[0] ) ) {
+							$src          = $src_per_wp[0];
+							$fullsize_url = true;
+
+							// Prevent image distortion if a detected dimension exceeds the image's natural dimensions.
+							if ( ( false !== $width && $width > $src_per_wp[1] ) || ( false !== $height && $height > $src_per_wp[2] ) ) {
+								$width  = false === $width ? false : min( $width, $src_per_wp[1] );
+								$height = false === $height ? false : min( $height, $src_per_wp[2] );
+							}
 
-					$photon_url = Image_CDN_Core::cdn_url( $src, $args );
+							// If no width and height are found, max out at source image's natural dimensions.
+							// Otherwise, respect registered image sizes' cropping setting.
+							if ( false === $width && false === $height ) {
+								$width     = $src_per_wp[1];
+								$height    = $src_per_wp[2];
+								$transform = 'fit';
+							} elseif ( isset( $size ) && array_key_exists( $size, $image_sizes ) && isset( $image_sizes[ $size ]['crop'] ) ) {
+								$transform = (bool) $image_sizes[ $size ]['crop'] ? 'resize' : 'fit';
+							}
+						}
+					} else {
+						unset( $attachment_id );
+						unset( $attachment );
+					}
+				}
 
-					// Modify image tag if Photon function provides a URL
-					// Ensure changes are only applied to the current image by copying and modifying the matched tag, then replacing the entire tag with our modified version.
-					if ( $src !== $photon_url ) {
-						$new_tag = $tag;
+				// If image tag lacks width and height arguments, try to determine from strings WP appends to resized image filenames.
+				if ( false === $width && false === $height ) {
+					list( $width, $height ) = self::parse_dimensions_from_filename( $src );
+				}
 
-						// If present, replace the link href with a Photoned URL for the full-size image.
-						if ( ! empty( $images['link_url'][ $index ] ) && self::validate_image_url( $images['link_url'][ $index ] ) ) {
-							$new_tag = preg_replace( '#(href=["|\'])' . preg_quote( $images['link_url'][ $index ], '#' ) . '(["|\'])#i', '\1' . Image_CDN_Core::cdn_url( $images['link_url'][ $index ] ) . '\2', $new_tag, 1 );
-						}
+				$width_orig     = $width;
+				$height_orig    = $height;
+				$transform_orig = $transform;
 
-						// Supplant the original source value with our Photon URL.
-						$photon_url = esc_url( $photon_url );
-						$new_tag    = str_replace( $src_orig, $photon_url, $new_tag );
+				// If width is available, constrain to $content_width.
+				if ( false !== $width && is_numeric( $content_width ) && $width > $content_width ) {
+					if ( false !== $height ) {
+						$height = round( ( $content_width * $height ) / $width );
+					}
+					$width = $content_width;
+				}
 
-						// If Lazy Load is in use, pass placeholder image through Photon.
-						if ( isset( $placeholder_src ) && self::validate_image_url( $placeholder_src ) ) {
-							$placeholder_src = Image_CDN_Core::cdn_url( $placeholder_src );
+				// Set a width if none is found and $content_width is available.
+				// If width is set in this manner and height is available, use `fit` instead of `resize` to prevent skewing.
+				if ( false === $width && is_numeric( $content_width ) ) {
+					$width = (int) $content_width;
 
-							if ( $placeholder_src !== $placeholder_src_orig ) {
-								$new_tag = str_replace( $placeholder_src_orig, esc_url( $placeholder_src ), $new_tag );
-							}
+					if ( false !== $height ) {
+						$transform = 'fit';
+					}
+				}
 
-							unset( $placeholder_src );
-						}
+				// Detect if image source is for a custom-cropped thumbnail and prevent further URL manipulation.
+				if ( ! $fullsize_url && preg_match_all( '#-e[a-z0-9]+(-\d+x\d+)?\.(' . implode( '|', self::$extensions ) . '){1}$#i', basename( $src ), $filename ) ) {
+					$fullsize_url = true;
+				}
 
-						// If we are not transforming the image with resize, fit, or letterbox (lb), then we should remove
-						// the width and height arguments (including HTML4 percentages) from the image to prevent distortion.
-						// Even if $args['w'] and $args['h'] are present, Photon does not crop to those dimensions. Instead,
-						// it appears to favor height.
-						//
-						// If we are transforming the image via one of those methods, let's update the width and height attributes.
-						if ( empty( $args['resize'] ) && empty( $args['fit'] ) && empty( $args['lb'] ) ) {
-							$new_tag = preg_replace( '#(?<=\s)(width|height)=["\']?[\d%]+["\']?\s?#i', '', $new_tag );
-						} else {
-							$resize_args = isset( $args['resize'] ) ? $args['resize'] : false;
-							if ( false === $resize_args ) {
-								$resize_args = ( ! $resize_args && isset( $args['fit'] ) )
-									? $args['fit']
-									: false;
-							}
-							if ( false === $resize_args ) {
-								$resize_args = ( ! $resize_args && isset( $args['lb'] ) )
-									? $args['lb']
-									: false;
-							}
+				// Build URL, first maybe removing WP's resized string so we pass the original image to Photon.
+				if ( ! $fullsize_url && 0 === strpos( $src, $upload_dir['baseurl'] ) ) {
+					$src = self::strip_image_dimensions_maybe( $src );
+				}
+
+				// Build array of Photon args and expose to filter before passing to Photon URL function.
+				$args = array();
 
-							$resize_args = array_map( 'trim', explode( ',', $resize_args ) );
+				if ( false !== $width && false !== $height ) {
+					$args[ $transform ] = $width . ',' . $height;
+				} elseif ( false !== $width ) {
+					$args['w'] = $width;
+				} elseif ( false !== $height ) {
+					$args['h'] = $height;
+				}
 
-							// (?<=\s)        - Ensure width or height attribute is preceded by a space
-							// (width=["\']?) - Matches, and captures, width=, width=", or width='
-							// [\d%]+         - Matches 1 or more digits or percent signs
-							// (["\']?)       - Matches, and captures, ", ', or empty string
-							// \s             - Ensures there's a space after the attribute
-							$new_tag = preg_replace( '#(?<=\s)(width=["\']?)[\d%]+(["\']?)\s?#i', sprintf( '${1}%d${2} ', $resize_args[0] ), $new_tag );
-							$new_tag = preg_replace( '#(?<=\s)(height=["\']?)[\d%]+(["\']?)\s?#i', sprintf( '${1}%d${2} ', $resize_args[1] ), $new_tag );
+				/**
+				 * Filter the array of Photon arguments added to an image when it goes through Photon.
+				 * By default, only includes width and height values.
+				 *
+				 * @see https://developer.wordpress.com/docs/photon/api/
+				 *
+				 * @module photon
+				 *
+				 * @since 2.0.0
+				 * @since {VERSION NUMBER} Passes image tag name instead of full HTML of tag.
+				 *
+				 * @param array $args Array of Photon Arguments.
+				 * @param array $details {
+				 *     Array of image details.
+				 *
+				 *     @type string    $tag            Image tag (Image HTML output).
+				 *     @type string    $src            Image URL.
+				 *     @type string    $src_orig       Original Image URL.
+				 *     @type int|false $width          Image width.
+				 *     @type int|false $height         Image height.
+				 *     @type int|false $width_orig     Original image width before constrained by content_width.
+				 *     @type int|false $height_orig    Original Image height before constrained by content_width.
+				 *     @type string    $transform      Transform.
+				 *     @type string    $transform_orig Original transform before constrained by content_width.
+				 * }
+				 */
+				$args = apply_filters( 'jetpack_photon_post_image_args', $args, compact( 'tag', 'src', 'src_orig', 'width', 'height', 'width_orig', 'height_orig', 'transform', 'transform_orig' ) );
+
+				$photon_url = Image_CDN_Core::cdn_url( $src, $args );
+
+				// Modify image tag if Photon function provides a URL
+				// Ensure changes are only applied to the current image by copying and modifying the matched tag, then replacing the entire tag with our modified version.
+				if ( $src !== $photon_url ) {
+					// If present, replace the link href with a Photoned URL for the full-size image.
+					if ( self::validate_image_url( $link_href ) ) {
+						$p->seek( 'link' );
+						$p->set_attribute( 'href', Image_CDN_Core::cdn_url( $link_href ) );
+						$p->seek( 'image' );
+					}
+
+					// Supplant the original source value with our Photon URL.
+					$p->set_attribute( 'src', esc_url( $photon_url ) );
+
+					// If Lazy Load is in use, pass placeholder image through Photon.
+					if ( isset( $placeholder_src ) && self::validate_image_url( $placeholder_src ) ) {
+						$placeholder_src = Image_CDN_Core::cdn_url( $placeholder_src );
+
+						if ( $placeholder_src !== $placeholder_src_orig ) {
+							$p->set_attribute( $source_type, esc_url( $placeholder_src ) );
 						}
 
-						// Tag an image for dimension checking.
-						if ( ! self::is_amp_endpoint() ) {
-							$new_tag = preg_replace( '#(\s?/)?>(\s*</a>)?$#i', ' data-recalc-dims="1"\1>\2', $new_tag );
+						unset( $placeholder_src );
+					}
+
+					// If we are not transforming the image with resize, fit, or letterbox (lb), then we should remove
+					// the width and height arguments (including HTML4 percentages) from the image to prevent distortion.
+					// Even if $args['w'] and $args['h'] are present, Photon does not crop to those dimensions. Instead,
+					// it appears to favor height.
+					//
+					// If we are transforming the image via one of those methods, let's update the width and height attributes.
+					if ( empty( $args['resize'] ) && empty( $args['fit'] ) && empty( $args['lb'] ) ) {
+						$p->remove_attribute( 'width' );
+						$p->remove_attribute( 'height' );
+					} else {
+						$resize_args = isset( $args['resize'] ) ? $args['resize'] : false;
+						if ( false === $resize_args ) {
+							$resize_args = ( ! $resize_args && isset( $args['fit'] ) )
+								? $args['fit']
+								: false;
+						}
+						if ( false === $resize_args ) {
+							$resize_args = ( ! $resize_args && isset( $args['lb'] ) )
+								? $args['lb']
+								: false;
 						}
 
-						// Replace original tag with modified version.
-						$content = str_replace( $tag, $new_tag, $content );
+						list( $resize_width, $resize_height ) = $resize_args;
+						$p->set_attribute( 'width', trim( $resize_width ) );
+						$p->set_attribute( 'height', trim( $resize_height ) );
 					}
-				} elseif ( preg_match( '#^http(s)?://i[\d]{1}.wp.com#', $src ) && ! empty( $images['link_url'][ $index ] ) && self::validate_image_url( $images['link_url'][ $index ] ) ) {
-					$new_tag = preg_replace( '#(href=["\'])' . preg_quote( $images['link_url'][ $index ], '#' ) . '(["\'])#i', '\1' . Image_CDN_Core::cdn_url( $images['link_url'][ $index ] ) . '\2', $tag, 1 );
 
-					$content = str_replace( $tag, $new_tag, $content );
+					// Tag an image for dimension checking.
+					if ( ! self::is_amp_endpoint() ) {
+						$p->set_attribute( 'data-recalc-dims', '1' );
+					}
 				}
+			} elseif ( preg_match( '#^http(s)?://i[\d]{1}.wp.com#', $src ) && self::validate_image_url( $link_href ) ) {
+				$p->seek( 'link' );
+				$p->set_attribute( 'href', Image_CDN_Core::cdn_url( $link_href ) );
+				$p->seek( 'image' );
 			}
 		}
 
-		return $content;
+		return $p->get_updated_html();
 	}
 
 	/**