From a9afdcc70d31caf68c8d2f9eaf07038091e4a1f5 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 24 Jan 2023 00:06:52 -0700 Subject: [PATCH] Revert behavioral change, refactor a few things - Presereed the original behavior when class-builder methods and attribute-setters for the `class` attribute coincide. - Renamed the enqueued-getter to `get_enqueued_attribute_value()` to focus on its semantic purpose instead of the implementation steps. - Un-doc-blocks some accidental docblock comments. - Reworded language use in a comment and removed "OTOH" as an unexplained abbreviation. --- .../html/class-wp-html-tag-processor.php | 150 +++++++++++------- phpunit/html/wp-html-tag-processor-test.php | 42 +++-- 2 files changed, 129 insertions(+), 63 deletions(-) diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index fa842027f25853..433c16a150806c 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -1059,7 +1059,7 @@ private function parse_next_attribute() { return true; } - /** + /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. @@ -1127,18 +1127,20 @@ private function class_name_updates_to_attributes_updates() { return; } - if ( isset( $this->lexical_updates['class'] ) ) { - $existing_class = $this->extract_attribute_value_from_lexical_update( 'class' ); - if ( true === $existing_class ) { - $existing_class = ''; - } - } elseif ( isset( $this->attributes['class'] ) ) { + $existing_class = $this->get_enqueued_attribute_value( 'class' ); + if ( null === $existing_class || true === $existing_class ) { + $existing_class = ''; + } + + if ( false === $existing_class && isset( $this->attributes['class'] ) ) { $existing_class = substr( $this->html, $this->attributes['class']->value_starts_at, $this->attributes['class']->value_length ); - } else { + } + + if ( false === $existing_class ) { $existing_class = ''; } @@ -1258,7 +1260,7 @@ private function apply_attributes_updates() { return; } - /** + /* * Attribute updates can be enqueued in any order but as we * progress through the document to replace them we have to * make our replacements in the order in which they are found @@ -1277,7 +1279,7 @@ private function apply_attributes_updates() { } foreach ( $this->bookmarks as $bookmark ) { - /** + /* * As we loop through $this->lexical_updates, we keep comparing * $bookmark->start and $bookmark->end to $diff->start. We can't * change it and still expect the correct result, so let's accumulate @@ -1378,50 +1380,66 @@ private static function sort_start_ascending( $a, $b ) { } /** - * Given an attribute name, return its value as set in $lexical_updates. + * Return the enqueued value for a given attribute, if one exists. + * + * Enqueued updates can take different data types: + * - If an update is enqueued and is boolean, the return will be `true` + * - If an update is otherwise enqueued, the return will be the string value of that update. + * - If an attribute is enqueued to be removed, the return will be `null` to indicate that. + * - If no updates are enqueued, the return will be `false` to differentiate from "removed." * * @since 6.2.0 * - * @param string $name The attribute name. - * @return string|true|null Value of attribute or `null` if not available. - * Boolean attributes return `true`. + * @param string $comparable_name The attribute name in its comparable form. + * @return string|boolean|null Value of enqueued update if present, otherwise false. */ - private function extract_attribute_value_from_lexical_update( $name ) { - $comparable = strtolower( trim( $name ) ); - - if ( ! isset( $this->lexical_updates[ $comparable ] ) ) { - return null; + private function get_enqueued_attribute_value( $comparable_name ) { + if ( ! isset( $this->lexical_updates[ $comparable_name ] ) ) { + return false; } - $attribute = trim( $this->lexical_updates[ $comparable ]->text ); - // Has the attribute been removed? - if ( '' === $attribute ) { + $enqueued_text = $this->lexical_updates[ $comparable_name ]->text; + + // Removed attributes erase the entire span. + if ( '' === $enqueued_text ) { return null; } - /** - * When the lexical update contains just the attribute name, - * the value becomes a boolean true. For example: - * - * ```php - * $p = new WP_HTML_Tag_Processor(''); - * $p->set_attribute('checked', true); - * // The lexical update contains just the string "checked" + /* + * Boolean attribute updates are just the attribute name without a corresponding value. * - * $p->get_attribute('checked'); - * // returns true thanks to the condition below + * This value might differ from the given comparable name in that there could be leading + * or trailing whitespace, and that the casing follows the name given in `set_attribute`. * - * echo $p->get_updated_html(); - * // + * Example: + * ``` + * $p->set_attribute( 'data-TEST-id', 'update' ); + * 'update' === $p->get_enqueued_attribute_value( 'data-test-id' ); * ``` + * + * Here we detect this based on the absence of the `=`, which _must_ exist in any + * attribute containing a value, e.g. ``. + * ¹ ² + * 1. Attribute with a string value. + * 2. Boolean attribute whose value is `true`. */ - if ( $attribute === $comparable ) { + $equals_at = strpos( $enqueued_text, '=' ); + if ( false === $equals_at ) { return true; } - // $attribute is a 'name="value"' string, so we extract the value. - $value = substr( $attribute, strlen( $comparable ) + 2, -1 ); - return html_entity_decode( $value ); + /* + * Finally, a normal update's value will appear after the `=` and + * be double-quoted, as performed incidentally by `set_attribute`. + * + * e.g. `type="text"` + * ¹² ³ + * 1. Equals is here. + * 2. Double-quoting starts one after the equals sign. + * 3. Double-quoting ends at the last character in the update. + */ + $enqueued_value = substr( $enqueued_text, $equals_at + 2, -1 ); + return html_entity_decode( $enqueued_value ); } /** @@ -1452,21 +1470,23 @@ public function get_attribute( $name ) { $comparable = strtolower( $name ); - /** - * For $this->lexical_updates (below), it makes sense (and is easy enough) to only - * evaluate the updates for the attribute we're interested in (if any) and to ignore - * updates for all other attributes. For class name updates OTOH, we would need to - * replicate most of the logic from `class_name_updates_to_attributes_updates` here, - * so we might as well just call that function and have classname updates "promoted" - * to attribute updates. + /* + * For every attribute other than `class` we can perform a quick check if there's an + * enqueued lexical update whose value we should prefer over what's in the input HTML. + * + * The `class` attribute is special though because we expose the helpers `add_class` + * and `remove_class` which form a builder for the `class` attribute, so we have to + * additionally check if there are any enqueued class changes. If there are, we need + * to first flush them out so can report the full string value of the attribute. */ if ( 'class' === $name ) { $this->class_name_updates_to_attributes_updates(); } // If we have an update for this attribute, return the updated value. - if ( isset( $this->lexical_updates[ $comparable ] ) ) { - return $this->extract_attribute_value_from_lexical_update( $comparable ); + $enqueued_value = $this->get_enqueued_attribute_value( $comparable ); + if ( false !== $enqueued_value ) { + return $enqueued_value; } if ( ! isset( $this->attributes[ $comparable ] ) ) { @@ -1475,6 +1495,17 @@ public function get_attribute( $name ) { $attribute = $this->attributes[ $comparable ]; + /* + * This flag distinguishes an attribute with no value + * from an attribute with an empty string value. For + * unquoted attributes this could look very similar. + * It refers to whether an `=` follows the name. + * + * e.g.
+ * ¹ ² + * 1. Attribute `boolean-attribute` is `true`. + * 2. Attribute `empty-attribute` is `""`. + */ if ( true === $attribute->is_true ) { return true; } @@ -1654,7 +1685,7 @@ public function set_attribute( $name, $value ) { $updated_attribute = "{$name}=\"{$escaped_new_value}\""; } - /** + /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. @@ -1700,6 +1731,14 @@ public function set_attribute( $name, $value ) { ' ' . $updated_attribute ); } + + /* + * Any calls to update the `class` attribute directly should wipe out any + * enqueued class changes from `add_class` and `remove_class`. + */ + if ( 'class' === $comparable_name && ! empty( $this->classname_updates ) ) { + $this->classname_updates = array(); + } } /** @@ -1714,7 +1753,7 @@ public function remove_attribute( $name ) { return false; } - /** + /* * > There must never be two or more attributes on * > the same start tag whose names are an ASCII * > case-insensitive match for each other. @@ -1724,14 +1763,19 @@ public function remove_attribute( $name ) { */ $name = strtolower( $name ); + /* + * Any calls to update the `class` attribute directly should wipe out any + * enqueued class changes from `add_class` and `remove_class`. + */ + if ( 'class' === $name && count( $this->classname_updates ) !== 0 ) { + $this->classname_updates = array(); + } + + // If we updated an attribute we didn't originally have, remove the enqueued update and move on. if ( ! isset( $this->attributes[ $name ] ) ) { - if ( 'class' === $name && count( $this->classname_updates ) !== 0 ) { - $this->classname_updates = array(); - } if ( isset( $this->lexical_updates[ $name ] ) ) { unset( $this->lexical_updates[ $name ] ); } - return false; } diff --git a/phpunit/html/wp-html-tag-processor-test.php b/phpunit/html/wp-html-tag-processor-test.php index f3a1f8f647b6c5..4df23c8eac4a8b 100644 --- a/phpunit/html/wp-html-tag-processor-test.php +++ b/phpunit/html/wp-html-tag-processor-test.php @@ -557,6 +557,22 @@ public function test_get_attribute_returns_updated_values_before_they_are_update ); } + public function test_get_attribute_returns_updated_values_before_they_are_updated_with_different_name_casing() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-ATTribute', 'test-value' ); + $this->assertSame( + 'test-value', + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called before get_updated_html()) did not return attribute added via set_attribute()' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not include attribute added via set_attribute()' + ); + } + /** * @ticket 56299 * @@ -1044,10 +1060,12 @@ public function test_removing_all_classes_removes_the_existing_class_attribute_f } /** - * When both set_attribute( 'class', $value ) and add_class( $different_value ) are called, the + * When add_class( $different_value ) is called _after_ set_attribute( 'class', $value ), the * final class name should be "$value $different_value". In other words, the `add_class` call - * should append its class to the one(s) set by `set_attribute`. This holds regardless of the - * order in which these methods are called. + * should append its class to the one(s) set by `set_attribute`. When `add_class( $different_value )` + * is called _before_ `set_attribute( 'class', $value )`, however, the final class name should be + * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued + * via the class builder methods. * * @ticket 56299 * @@ -1062,12 +1080,12 @@ public function test_set_attribute_takes_priority_over_add_class() { $p->add_class( 'add_class' ); $p->set_attribute( 'class', 'set_attribute' ); $this->assertSame( - '
Text
', + '
Text
', $p->get_updated_html(), "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" ); $this->assertSame( - 'set_attribute add_class', + 'set_attribute', $p->get_attribute( 'class' ), "Calling get_attribute after updating first tag's attributes did not return the expected class name" ); @@ -1089,10 +1107,14 @@ public function test_set_attribute_takes_priority_over_add_class() { } /** - * When both set_attribute( 'class', $value ) and add_class( $different_value ) are called, the + * When add_class( $different_value ) is called _after_ set_attribute( 'class', $value ), the * final class name should be "$value $different_value". In other words, the `add_class` call - * should append its class to the one(s) set by `set_attribute`. This holds regardless of the - * order in which these methods are called, and even before `get_updated_html` is called. + * should append its class to the one(s) set by `set_attribute`. When `add_class( $different_value )` + * is called _before_ `set_attribute( 'class', $value )`, however, the final class name should be + * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued + * via the class builder methods. + * + * This is still true if we read enqueued updates before calling `get_updated_html()`. * * @ticket 56299 * @@ -1107,12 +1129,12 @@ public function test_set_attribute_takes_priority_over_add_class_even_before_upd $p->add_class( 'add_class' ); $p->set_attribute( 'class', 'set_attribute' ); $this->assertSame( - 'set_attribute add_class', + 'set_attribute', $p->get_attribute( 'class' ), "Calling get_attribute after updating first tag's attributes did not return the expected class name" ); $this->assertSame( - '
Text
', + '
Text
', $p->get_updated_html(), "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" );