Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

19849 og image tags are not showing the biggest available image when a resized version is used in the content #21534

Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions inc/class-wpseo-content-images.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ public function get_images_from_content( $content ) {
}

$content_images = $this->get_img_tags_from_content( $content );
$images = array_map( [ $this, 'get_img_tag_source' ], $content_images );
$images = array_filter( $images );
$images = array_unique( $images );
$images = array_values( $images ); // Reset the array keys.

$images = array_map( [ $this, 'get_img_tag_source' ], $content_images );
$images = array_filter( $images );
$images = array_unique( $images );
$images = array_values( $images ); // Reset the array keys.
return $images;
}

Expand Down
208 changes: 41 additions & 167 deletions src/builders/indexable-link-builder.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

namespace Yoast\WP\SEO\Builders;

use DOMDocument;
use WP_HTML_Tag_Processor;
use WPSEO_Image_Utils;
use Yoast\WP\SEO\Helpers\Image_Helper;
use Yoast\WP\SEO\Helpers\Indexable_Helper;
use Yoast\WP\SEO\Helpers\Options_Helper;
use Yoast\WP\SEO\Helpers\Post_Helper;
use Yoast\WP\SEO\Helpers\Url_Helper;
use Yoast\WP\SEO\Images\Application\Image_Content_Extractor;
use Yoast\WP\SEO\Models\Indexable;
use Yoast\WP\SEO\Models\SEO_Links;
use Yoast\WP\SEO\Repositories\Indexable_Repository;
Expand Down Expand Up @@ -69,6 +68,13 @@ class Indexable_Link_Builder {
*/
protected $indexable_repository;

/**
* Class that finds all images in a content string and extracts them.
*
* @var Image_Content_Extractor
*/
private $image_content_extractor;

/**
* Indexable_Link_Builder constructor.
*
Expand All @@ -83,13 +89,15 @@ public function __construct(
Url_Helper $url_helper,
Post_Helper $post_helper,
Options_Helper $options_helper,
Indexable_Helper $indexable_helper
Indexable_Helper $indexable_helper,
Image_Content_Extractor $image_content_extractor
) {
$this->seo_links_repository = $seo_links_repository;
$this->url_helper = $url_helper;
$this->post_helper = $post_helper;
$this->options_helper = $options_helper;
$this->indexable_helper = $indexable_helper;
$this->seo_links_repository = $seo_links_repository;
$this->url_helper = $url_helper;
$this->post_helper = $post_helper;
$this->options_helper = $options_helper;
$this->indexable_helper = $indexable_helper;
$this->image_content_extractor = $image_content_extractor;
}

/**
Expand Down Expand Up @@ -137,7 +145,7 @@ public function build( $indexable, $content ) {

$content = \str_replace( ']]>', ']]>', $content );
$links = $this->gather_links( $content );
$images = $this->gather_images( $content );
$images = $this->image_content_extractor->gather_images( $content );
leonidasmi marked this conversation as resolved.
Show resolved Hide resolved

if ( empty( $links ) && empty( $images ) ) {
$indexable->link_count = 0;
Expand All @@ -146,6 +154,10 @@ public function build( $indexable, $content ) {
return [];
}

if ( ! empty( $images ) && $indexable->open_graph_image_source === 'first-content-image' ) {
$this->update_first_content_image( $indexable, $images );
thijsoo marked this conversation as resolved.
Show resolved Hide resolved
}

$links = $this->create_links( $indexable, $links, $images );

$this->update_related_indexables( $indexable, $links );
Expand Down Expand Up @@ -229,164 +241,6 @@ protected function gather_links( $content ) {
return $links;
}

/**
* Gathers all images from content with WP's WP_HTML_Tag_Processor() and returns them along with their IDs, if
* possible.
*
* @param string $content The content.
*
* @return int[] An associated array of image IDs, keyed by their URL.
*/
protected function gather_images_wp( $content ) {
$processor = new WP_HTML_Tag_Processor( $content );
$images = [];

$query = [
'tag_name' => 'img',
];

/**
* Filter 'wpseo_image_attribute_containing_id' - Allows filtering what attribute will be used to extract image IDs from.
*
* Defaults to "class", which is where WP natively stores the image IDs, in a `wp-image-<ID>` format.
*
* @api string The attribute to be used to extract image IDs from.
*/
$attribute = \apply_filters( 'wpseo_image_attribute_containing_id', 'class' );

while ( $processor->next_tag( $query ) ) {
$src = \htmlentities( $processor->get_attribute( 'src' ), ( \ENT_QUOTES | \ENT_SUBSTITUTE | \ENT_HTML401 ), \get_bloginfo( 'charset' ) );
$classes = $processor->get_attribute( $attribute );
$id = $this->extract_id_of_classes( $classes );

$images[ $src ] = $id;
}

return $images;
}

/**
* Gathers all images from content with DOMDocument() and returns them along with their IDs, if possible.
*
* @param string $content The content.
*
* @return int[] An associated array of image IDs, keyed by their URL.
*/
protected function gather_images_domdocument( $content ) {
$images = [];
$charset = \get_bloginfo( 'charset' );

/**
* Filter 'wpseo_image_attribute_containing_id' - Allows filtering what attribute will be used to extract image IDs from.
*
* Defaults to "class", which is where WP natively stores the image IDs, in a `wp-image-<ID>` format.
*
* @api string The attribute to be used to extract image IDs from.
*/
$attribute = \apply_filters( 'wpseo_image_attribute_containing_id', 'class' );

\libxml_use_internal_errors( true );
$post_dom = new DOMDocument();
$post_dom->loadHTML( '<?xml encoding="' . $charset . '">' . $content );
\libxml_clear_errors();

foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) {
$src = \htmlentities( $img->getAttribute( 'src' ), ( \ENT_QUOTES | \ENT_SUBSTITUTE | \ENT_HTML401 ), $charset );
$classes = $img->getAttribute( $attribute );
$id = $this->extract_id_of_classes( $classes );

$images[ $src ] = $id;
}

return $images;
}

/**
* Extracts image ID out of the image's classes.
*
* @param string $classes The classes assigned to the image.
*
* @return int The ID that's extracted from the classes.
*/
protected function extract_id_of_classes( $classes ) {
if ( ! $classes ) {
return 0;
}

/**
* Filter 'wpseo_extract_id_pattern' - Allows filtering the regex patern to be used to extract image IDs from class/attribute names.
*
* Defaults to the pattern that extracts image IDs from core's `wp-image-<ID>` native format in image classes.
*
* @api string The regex pattern to be used to extract image IDs from class names. Empty string if the whole class/attribute should be returned.
*/
$pattern = \apply_filters( 'wpseo_extract_id_pattern', '/(?<!\S)wp-image-(\d+)(?!\S)/i' );

if ( $pattern === '' ) {
return (int) $classes;
}

$matches = [];

if ( \preg_match( $pattern, $classes, $matches ) ) {
return (int) $matches[1];
}

return 0;
}

/**
* Gathers all images from content.
*
* @param string $content The content.
*
* @return int[] An associated array of image IDs, keyed by their URLs.
*/
protected function gather_images( $content ) {

/**
* Filter 'wpseo_force_creating_and_using_attachment_indexables' - Filters if we should use attachment indexables to find all content images. Instead of scanning the content.
*
* The default value is false.
*
* @since 21.1
*/
$should_not_parse_content = \apply_filters( 'wpseo_force_creating_and_using_attachment_indexables', false );

/**
* Filter 'wpseo_force_skip_image_content_parsing' - Filters if we should force skip scanning the content to parse images.
* This filter can be used if the regex gives a faster result than scanning the code.
*
* The default value is false.
*
* @since 21.1
*/
$should_not_parse_content = \apply_filters( 'wpseo_force_skip_image_content_parsing', $should_not_parse_content );
if ( ! $should_not_parse_content && \class_exists( WP_HTML_Tag_Processor::class ) ) {
return $this->gather_images_wp( $content );
}

if ( ! $should_not_parse_content && \class_exists( DOMDocument::class ) ) {
return $this->gather_images_DOMDocument( $content );
}

if ( \strpos( $content, 'src' ) === false ) {
// Nothing to do.
return [];
}

$images = [];
$regexp = '<img\s[^>]*src=("??)([^" >]*?)\\1[^>]*>';
// Used modifiers iU to match case insensitive and make greedy quantifiers lazy.
if ( \preg_match_all( "/$regexp/iU", $content, $matches, \PREG_SET_ORDER ) ) {
foreach ( $matches as $match ) {
$images[ $match[2] ] = 0;
}
}

return $images;
}

/**
* Creates link models from lists of URLs and image sources.
*
Expand Down Expand Up @@ -729,4 +583,24 @@ protected function update_incoming_links_for_related_indexables( $related_indexa
$this->indexable_repository->update_incoming_link_count( $count['target_indexable_id'], $count['incoming'] );
}
}

/**
* Updates the image ids when the indexable images are marked as first content image.
*
* @param Indexable $indexable The indexable to change.
* @param array<string|int> $images The image array.
*
* @return void
*/
public function update_first_content_image( Indexable $indexable, array $images ): void {
$current_first_content_image = $indexable->open_graph_image;

$first_content_image_url = \key( $images );
$first_content_image_id = \current( $images );

if ( $current_first_content_image === $first_content_image_url ) {
$indexable->open_graph_image_id = $first_content_image_id;
thijsoo marked this conversation as resolved.
Show resolved Hide resolved
$indexable->twitter_image_id = $first_content_image_id;
leonidasmi marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
Loading
Loading