Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

19849 og image tags are not showing the biggest available image when a resized version is used in the content #21534

Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions inc/class-wpseo-content-images.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ public function get_images_from_content( $content ) {
}

$content_images = $this->get_img_tags_from_content( $content );
$images = array_map( [ $this, 'get_img_tag_source' ], $content_images );
$images = array_filter( $images );
$images = array_unique( $images );
$images = array_values( $images ); // Reset the array keys.

$images = array_map( [ $this, 'get_img_tag_source' ], $content_images );
$images = array_filter( $images );
$images = array_unique( $images );
$images = array_values( $images ); // Reset the array keys.
return $images;
}

Expand Down
184 changes: 17 additions & 167 deletions src/builders/indexable-link-builder.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

namespace Yoast\WP\SEO\Builders;

use DOMDocument;
use WP_HTML_Tag_Processor;
use WPSEO_Image_Utils;
use Yoast\WP\SEO\Helpers\Image_Helper;
use Yoast\WP\SEO\Helpers\Indexable_Helper;
use Yoast\WP\SEO\Helpers\Options_Helper;
use Yoast\WP\SEO\Helpers\Post_Helper;
use Yoast\WP\SEO\Helpers\Url_Helper;
use Yoast\WP\SEO\Images\Application\Image_Content_Extractor;
use Yoast\WP\SEO\Models\Indexable;
use Yoast\WP\SEO\Models\SEO_Links;
use Yoast\WP\SEO\Repositories\Indexable_Repository;
Expand Down Expand Up @@ -69,6 +68,13 @@ class Indexable_Link_Builder {
*/
protected $indexable_repository;

/**
* Class that finds all images in a content string and extracts them.
*
* @var Image_Content_Extractor
*/
private $image_content_extractor;

/**
* Indexable_Link_Builder constructor.
*
Expand All @@ -83,13 +89,15 @@ public function __construct(
Url_Helper $url_helper,
Post_Helper $post_helper,
Options_Helper $options_helper,
Indexable_Helper $indexable_helper
Indexable_Helper $indexable_helper,
Image_Content_Extractor $image_content_extractor
) {
$this->seo_links_repository = $seo_links_repository;
$this->url_helper = $url_helper;
$this->post_helper = $post_helper;
$this->options_helper = $options_helper;
$this->indexable_helper = $indexable_helper;
$this->seo_links_repository = $seo_links_repository;
$this->url_helper = $url_helper;
$this->post_helper = $post_helper;
$this->options_helper = $options_helper;
$this->indexable_helper = $indexable_helper;
$this->image_content_extractor = $image_content_extractor;
}

/**
Expand Down Expand Up @@ -137,7 +145,7 @@ public function build( $indexable, $content ) {

$content = \str_replace( ']]>', ']]>', $content );
$links = $this->gather_links( $content );
$images = $this->gather_images( $content );
$images = $this->image_content_extractor->gather_images( $content );
leonidasmi marked this conversation as resolved.
Show resolved Hide resolved

if ( empty( $links ) && empty( $images ) ) {
$indexable->link_count = 0;
Expand Down Expand Up @@ -229,164 +237,6 @@ protected function gather_links( $content ) {
return $links;
}

/**
* Gathers all images from content with WP's WP_HTML_Tag_Processor() and returns them along with their IDs, if
* possible.
*
* @param string $content The content.
*
* @return int[] An associated array of image IDs, keyed by their URL.
*/
protected function gather_images_wp( $content ) {
$processor = new WP_HTML_Tag_Processor( $content );
$images = [];

$query = [
'tag_name' => 'img',
];

/**
* Filter 'wpseo_image_attribute_containing_id' - Allows filtering what attribute will be used to extract image IDs from.
*
* Defaults to "class", which is where WP natively stores the image IDs, in a `wp-image-<ID>` format.
*
* @api string The attribute to be used to extract image IDs from.
*/
$attribute = \apply_filters( 'wpseo_image_attribute_containing_id', 'class' );

while ( $processor->next_tag( $query ) ) {
$src = \htmlentities( $processor->get_attribute( 'src' ), ( \ENT_QUOTES | \ENT_SUBSTITUTE | \ENT_HTML401 ), \get_bloginfo( 'charset' ) );
$classes = $processor->get_attribute( $attribute );
$id = $this->extract_id_of_classes( $classes );

$images[ $src ] = $id;
}

return $images;
}

/**
* Gathers all images from content with DOMDocument() and returns them along with their IDs, if possible.
*
* @param string $content The content.
*
* @return int[] An associated array of image IDs, keyed by their URL.
*/
protected function gather_images_domdocument( $content ) {
$images = [];
$charset = \get_bloginfo( 'charset' );

/**
* Filter 'wpseo_image_attribute_containing_id' - Allows filtering what attribute will be used to extract image IDs from.
*
* Defaults to "class", which is where WP natively stores the image IDs, in a `wp-image-<ID>` format.
*
* @api string The attribute to be used to extract image IDs from.
*/
$attribute = \apply_filters( 'wpseo_image_attribute_containing_id', 'class' );

\libxml_use_internal_errors( true );
$post_dom = new DOMDocument();
$post_dom->loadHTML( '<?xml encoding="' . $charset . '">' . $content );
\libxml_clear_errors();

foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) {
$src = \htmlentities( $img->getAttribute( 'src' ), ( \ENT_QUOTES | \ENT_SUBSTITUTE | \ENT_HTML401 ), $charset );
$classes = $img->getAttribute( $attribute );
$id = $this->extract_id_of_classes( $classes );

$images[ $src ] = $id;
}

return $images;
}

/**
* Extracts image ID out of the image's classes.
*
* @param string $classes The classes assigned to the image.
*
* @return int The ID that's extracted from the classes.
*/
protected function extract_id_of_classes( $classes ) {
if ( ! $classes ) {
return 0;
}

/**
* Filter 'wpseo_extract_id_pattern' - Allows filtering the regex patern to be used to extract image IDs from class/attribute names.
*
* Defaults to the pattern that extracts image IDs from core's `wp-image-<ID>` native format in image classes.
*
* @api string The regex pattern to be used to extract image IDs from class names. Empty string if the whole class/attribute should be returned.
*/
$pattern = \apply_filters( 'wpseo_extract_id_pattern', '/(?<!\S)wp-image-(\d+)(?!\S)/i' );

if ( $pattern === '' ) {
return (int) $classes;
}

$matches = [];

if ( \preg_match( $pattern, $classes, $matches ) ) {
return (int) $matches[1];
}

return 0;
}

/**
* Gathers all images from content.
*
* @param string $content The content.
*
* @return int[] An associated array of image IDs, keyed by their URLs.
*/
protected function gather_images( $content ) {

/**
* Filter 'wpseo_force_creating_and_using_attachment_indexables' - Filters if we should use attachment indexables to find all content images. Instead of scanning the content.
*
* The default value is false.
*
* @since 21.1
*/
$should_not_parse_content = \apply_filters( 'wpseo_force_creating_and_using_attachment_indexables', false );

/**
* Filter 'wpseo_force_skip_image_content_parsing' - Filters if we should force skip scanning the content to parse images.
* This filter can be used if the regex gives a faster result than scanning the code.
*
* The default value is false.
*
* @since 21.1
*/
$should_not_parse_content = \apply_filters( 'wpseo_force_skip_image_content_parsing', $should_not_parse_content );
if ( ! $should_not_parse_content && \class_exists( WP_HTML_Tag_Processor::class ) ) {
return $this->gather_images_wp( $content );
}

if ( ! $should_not_parse_content && \class_exists( DOMDocument::class ) ) {
return $this->gather_images_DOMDocument( $content );
}

if ( \strpos( $content, 'src' ) === false ) {
// Nothing to do.
return [];
}

$images = [];
$regexp = '<img\s[^>]*src=("??)([^" >]*?)\\1[^>]*>';
// Used modifiers iU to match case insensitive and make greedy quantifiers lazy.
if ( \preg_match_all( "/$regexp/iU", $content, $matches, \PREG_SET_ORDER ) ) {
foreach ( $matches as $match ) {
$images[ $match[2] ] = 0;
}
}

return $images;
}

/**
* Creates link models from lists of URLs and image sources.
*
Expand Down
9 changes: 9 additions & 0 deletions src/builders/indexable-post-builder.php
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,15 @@ protected function find_alternative_image( Indexable $indexable ) {
];
}

$content_image = $this->image->get_post_content_image_id( $indexable->object_id );

if ( $content_image !== '' ) {
return [
'image_id' => $content_image,
'source' => 'first-content-image',
];
}

$content_image = $this->image->get_post_content_image( $indexable->object_id );
if ( $content_image ) {
return [
Expand Down
78 changes: 67 additions & 11 deletions src/helpers/image-helper.php
thijsoo marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Yoast\WP\SEO\Helpers;

use WPSEO_Image_Utils;
use Yoast\WP\SEO\Images\Application\Image_Content_Extractor;
use Yoast\WP\SEO\Models\SEO_Links;
use Yoast\WP\SEO\Repositories\Indexable_Repository;
use Yoast\WP\SEO\Repositories\SEO_Links_Repository;
Expand Down Expand Up @@ -40,6 +41,13 @@ class Image_Helper {
*/
protected $seo_links_repository;

/**
* The image content extractor.
*
* @var Image_Content_Extractor
*/
protected $image_content_extractor;

/**
* The options helper.
*
Expand All @@ -57,21 +65,24 @@ class Image_Helper {
/**
* Image_Helper constructor.
*
* @param Indexable_Repository $indexable_repository The indexable repository.
* @param SEO_Links_Repository $seo_links_repository The SEO Links repository.
* @param Options_Helper $options The options helper.
* @param Url_Helper $url_helper The URL helper.
* @param Indexable_Repository $indexable_repository The indexable repository.
* @param SEO_Links_Repository $seo_links_repository The SEO Links repository.
* @param Options_Helper $options The options helper.
* @param Url_Helper $url_helper The URL helper.
* @param Image_Content_Extractor $image_content_extractor The image content extractor.
*/
public function __construct(
Indexable_Repository $indexable_repository,
SEO_Links_Repository $seo_links_repository,
Options_Helper $options,
Url_Helper $url_helper
Url_Helper $url_helper,
Image_Content_Extractor $image_content_extractor
) {
$this->indexable_repository = $indexable_repository;
$this->seo_links_repository = $seo_links_repository;
$this->options_helper = $options;
$this->url_helper = $url_helper;
$this->indexable_repository = $indexable_repository;
$this->seo_links_repository = $seo_links_repository;
$this->options_helper = $options;
$this->url_helper = $url_helper;
$this->image_content_extractor = $image_content_extractor;
}

/**
Expand Down Expand Up @@ -150,7 +161,24 @@ public function get_featured_image_id( $post_id ) {
}

/**
* Gets the image url from the content.
* Gets the image ic from the content.
leonidasmi marked this conversation as resolved.
Show resolved Hide resolved
*
* @param int $post_id The post id to extract the images from.
*
* @return string The image url or an empty string when not found.
*/
public function get_post_content_image_id( $post_id ) {
thijsoo marked this conversation as resolved.
Show resolved Hide resolved
$image_url = $this->get_first_usable_content_image_id_for_post( $post_id );

if ( $image_url === null ) {
return '';
}

return $image_url;
}

/**
* Gets the image ic from the content.
*
* @param int $post_id The post id to extract the images from.
*
Expand Down Expand Up @@ -366,7 +394,8 @@ public function get_attachment_id_from_settings( $setting ) {
}

/**
* Based on and image ID return array with the best variation of that image. If it's not saved to the DB, save it to an option.
* Based on and image ID return array with the best variation of that image. If it's not saved to the DB, save it
* to an option.
*
* @param string $setting The setting name. Should be company or person.
*
Expand All @@ -389,6 +418,33 @@ public function get_attachment_meta_from_settings( $setting ) {
return $image_meta;
}

/**
* Retrieves the first usable content image_id for a post.
*
* @codeCoverageIgnore - We have to write test when this method contains own code.
*
* @param int $post_id The post id to extract the images from.
*
* @return string|null
*/
protected function get_first_usable_content_image_id_for_post( $post_id ) {
thijsoo marked this conversation as resolved.
Show resolved Hide resolved
// phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited -- To setup the post we need to do this explicitly.
global $post;
$post_backup = $post;
$post = \get_post( $post_id );

\setup_postdata( $post );
$content = \apply_filters( 'the_content', $post->post_content );
\wp_reset_postdata();
// phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited -- To setup the post we need to do this explicitly.
$post = $post_backup;

$content = \str_replace( ']]>', ']]&gt;', $content );
$images = $this->image_content_extractor->gather_images( $content );

return \array_shift( $images );
}

/**
* Retrieves the first usable content image for a post.
*
Expand Down
Loading
Loading