Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Return elements pushed and popped rather than tags read. #6348

Closed
wants to merge 10 commits into from
76 changes: 60 additions & 16 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -366,13 +366,15 @@
* @todo Support matching the class name and tag name.
*
* @since 6.4.0
* @since 6.6.0 Visits all tokens, including virtual ones.
*
* @throws Exception When unable to allocate a bookmark for the next token in the input HTML document.
*
* @param array|string|null $query {
* Optional. Which tag name to find, having which class, etc. Default is to find any tag.
*
* @type string|null $tag_name Which tag to find, or `null` for "any tag."
* @type string $tag_closers 'visit' to pause at tag closers, 'skip' or unset to only visit openers.
* @type int|null $match_offset Find the Nth tag matching all search criteria.
* 1 for "first" tag, 3 for "third," etc.
* Defaults to first tag.
Expand All @@ -383,13 +385,15 @@
* @return bool Whether a tag was matched.
*/
public function next_tag( $query = null ) {
$visit_closers = isset( $query['tag_closers'] ) && 'visit' === $query['tag_closers'];

if ( null === $query ) {
while ( $this->step() ) {
while ( $this->next_token() ) {
if ( '#tag' !== $this->get_token_type() ) {
continue;
}

if ( ! parent::is_tag_closer() ) {
if ( ! $this::is_tag_closer() || $visit_closers ) {
return true;
}
}
Expand All @@ -415,7 +419,7 @@
: null;

if ( ! ( array_key_exists( 'breadcrumbs', $query ) && is_array( $query['breadcrumbs'] ) ) ) {
while ( $this->step() ) {
while ( $this->next_token() ) {
if ( '#tag' !== $this->get_token_type() ) {
continue;
}
Expand All @@ -424,28 +428,19 @@
continue;
}

if ( ! parent::is_tag_closer() ) {
if ( ! parent::is_tag_closer() || $visit_closers ) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this also use $this?

Suggested change
if ( ! parent::is_tag_closer() || $visit_closers ) {
if ( ! $this->is_tag_closer() || $visit_closers ) {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Can you concoct a test case to fail for it? If not, don't worry - we can just fix it.

return true;
}
}

return false;
}

if ( isset( $query['tag_closers'] ) && 'visit' === $query['tag_closers'] ) {
_doing_it_wrong(
__METHOD__,
__( 'Cannot visit tag closers in HTML Processor.' ),
'6.4.0'
);
return false;
}

$breadcrumbs = $query['breadcrumbs'];
$match_offset = isset( $query['match_offset'] ) ? (int) $query['match_offset'] : 1;

while ( $match_offset > 0 && $this->step() ) {
if ( '#tag' !== $this->get_token_type() ) {
while ( $match_offset > 0 && $this->next_token() ) {
if ( '#tag' !== $this->get_token_type() || $this->is_tag_closer() ) {
continue;
}

Expand Down Expand Up @@ -478,6 +473,10 @@
public function next_token() {
$this->current_element = null;

if ( isset( $this->last_error ) ) {
return false;
}

if ( 0 === count( $this->element_queue ) && ! $this->step() ) {
while ( $this->state->stack_of_open_elements->pop() ) {
continue;
Expand Down Expand Up @@ -1432,6 +1431,47 @@
return parent::get_token_type();
}

/**
* Returns the value of a requested attribute from a matched tag opener if that attribute exists.
*
* Example:
*
* $p = WP_HTML_Processor::create_fragment( '<div enabled class="test" data-test-id="14">Test</div>' );
* $p->next_token() === true;
* $p->get_attribute( 'data-test-id' ) === '14';
* $p->get_attribute( 'enabled' ) === true;
* $p->get_attribute( 'aria-label' ) === null;
*
* $p->next_tag() === false;
* $p->get_attribute( 'class' ) === null;
*
* @since 6.6.0 Subclassed for HTML Processor.
*
* @param string $name Name of attribute whose value is requested.
* @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
*/
public function get_attribute( $name ) {
if ( isset( $this->current_element ) ) {
// Closing tokens cannot contain attributes.
if ( WP_HTML_Stack_Event::POP === $this->current_element->operation ) {
return null;
}

$node_name = $this->current_element->token->node_name;

// Only tags can contain attributes.
if ( 'A' > $node_name[0] || 'Z' < $node_name[0] ) {
return null;
}

if ( $this->current_element->token->bookmark_name === (string) $this->bookmark_counter ) {
return parent::get_attribute( $name );
}
}

return null;
}

public function get_attribute_names_with_prefix( $prefix ) {
if ( isset( $this->current_element ) ) {
if ( WP_HTML_Stack_Event::POP === $this->current_element->operation ) {
Expand Down Expand Up @@ -1503,6 +1543,7 @@
? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start
: 0;
$bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start;
$bookmark_length = $this->bookmarks[ $actual_bookmark_name ]->length;
$direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward';

/*
Expand Down Expand Up @@ -1567,8 +1608,11 @@
return true;
}

while ( $this->step() ) {
while ( $this->next_token() ) {
if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
while ( isset( $this->current_element ) && $this->current_element->operation === WP_HTML_Stack_Event::POP ) {

Check failure on line 1613 in src/wp-includes/html-api/class-wp-html-processor.php

View workflow job for this annotation

GitHub Actions / PHP coding standards / Run coding standards checks

Use Yoda Condition checks, you must.
$this->current_element = array_shift( $this->element_queue );
}
return true;
}
}
Expand Down
14 changes: 10 additions & 4 deletions tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,18 @@ public static function data_unsupported_elements() {
public function test_fails_when_encountering_unsupported_markup( $html, $description ) {
$processor = WP_HTML_Processor::create_fragment( $html );

while ( $processor->step() && null === $processor->get_attribute( 'supported' ) ) {
while ( $processor->next_token() && null === $processor->get_attribute( 'supported' ) ) {
continue;
}

$this->assertNull(
$processor->get_last_error(),
'Bailed on unsupported input before finding supported checkpoint: check test code.'
);

$this->assertTrue( $processor->get_attribute( 'supported' ), 'Did not find required supported element.' );
$this->assertFalse( $processor->step(), "Didn't properly reject unsupported markup: {$description}" );
$processor->next_token();
$this->assertNotNull( $processor->get_last_error(), "Didn't properly reject unsupported markup: {$description}" );
}

/**
Expand All @@ -247,7 +253,7 @@ public function test_fails_when_encountering_unsupported_markup( $html, $descrip
public static function data_unsupported_markup() {
return array(
'A with formatting following unclosed A' => array(
'<a><strong>Click <a supported><big unsupported>Here</big></a></strong></a>',
'<a><strong>Click <span supported><a unsupported><big>Here</big></a></strong></a>',
'Unclosed formatting requires complicated reconstruction.',
),

Expand Down Expand Up @@ -325,7 +331,7 @@ public static function data_html_target_with_breadcrumbs() {
'IMG after invalid DIV closer' => array( '</div><img target>', array( 'HTML', 'BODY', 'IMG' ), 1 ),
'EM inside DIV' => array( '<div>The weather is <em target>beautiful</em>.</div>', array( 'HTML', 'BODY', 'DIV', 'EM' ), 1 ),
'EM after closed EM' => array( '<em></em><em target></em>', array( 'HTML', 'BODY', 'EM' ), 2 ),
'EM after closed EMs' => array( '<em></em><em><em></em></em><em></em><em></em><em target></em>', array( 'HTML', 'BODY', 'EM' ), 6 ),
'EM after closed EMs' => array( '<em></em><em><em></em></em><em></em><em></em><em target></em>', array( 'HTML', 'BODY', 'EM' ), 5 ),
'EM after unclosed EM' => array( '<em><em target></em>', array( 'HTML', 'BODY', 'EM', 'EM' ), 1 ),
'EM after unclosed EM after DIV' => array( '<em><div><em target>', array( 'HTML', 'BODY', 'EM', 'DIV', 'EM' ), 1 ),
// This should work for all formatting elements, but if two work, the others probably do too.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element
$this->assertSame( 'CODE', $processor->get_tag(), "Expected to start test on CODE element but found {$processor->get_tag()} instead." );
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'CODE' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting.' );

$this->assertTrue( $processor->step(), 'Failed to advance past CODE tag to expected SPAN closer.' );
$this->assertTrue( $processor->next_token(), 'Failed to advance past CODE tag to expected SPAN closer.' );
$this->assertTrue( $processor->is_tag_closer(), 'Expected to find closing SPAN, but found opener instead.' );
$this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs(), 'Failed to advance past CODE tag to expected DIV opener.' );

Expand Down
Loading