From b21189e96f0b7f6fd4839a6a8dc0dcf856472690 Mon Sep 17 00:00:00 2001 From: jonsurrell Date: Wed, 2 Jul 2025 12:35:28 +0000 Subject: [PATCH] Docs: Expand valid_unicode function documentation. The `valid_unicode()` function accepts a limited set of codepoints according to the XML specification. Document the allowed codepoints and link to relevant documentation. Developed in https://github.com/WordPress/wordpress-develop/pull/9100. Props jonsurrell, dmsnell. See #6583, #63166. Built from https://develop.svn.wordpress.org/trunk@60405 git-svn-id: http://core.svn.wordpress.org/trunk@59741 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/kses.php | 28 ++++++++++++++++++++++++---- wp-includes/version.php | 2 +- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/wp-includes/kses.php b/wp-includes/kses.php index ebb4a761b1..28bbce222a 100644 --- a/wp-includes/kses.php +++ b/wp-includes/kses.php @@ -2083,18 +2083,38 @@ function wp_kses_normalize_entities3( $matches ) { /** * Determines if a Unicode codepoint is valid. * + * The definition of a valid Unicode codepoint is taken from the XML definition: + * + * > Characters + * > + * > … + * > Legal characters are tab, carriage return, line feed, and the legal characters of + * > Unicode and ISO/IEC 10646. + * > … + * > Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + * * @since 2.7.0 * + * @see https://www.w3.org/TR/xml/#charsets + * * @param int $i Unicode codepoint. * @return bool Whether or not the codepoint is a valid Unicode codepoint. */ function valid_unicode( $i ) { $i = (int) $i; - return ( 0x9 === $i || 0xa === $i || 0xd === $i || - ( 0x20 <= $i && $i <= 0xd7ff ) || - ( 0xe000 <= $i && $i <= 0xfffd ) || - ( 0x10000 <= $i && $i <= 0x10ffff ) + return ( + 0x9 === $i || // U+0009 HORIZONTAL TABULATION (HT) + 0xA === $i || // U+000A LINE FEED (LF) + 0xD === $i || // U+000D CARRIAGE RETURN (CR) + /* + * The valid Unicode characters according to the XML specification: + * + * > any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ + ( 0x20 <= $i && $i <= 0xD7FF ) || + ( 0xE000 <= $i && $i <= 0xFFFD ) || + ( 0x10000 <= $i && $i <= 0x10FFFF ) ); } diff --git a/wp-includes/version.php b/wp-includes/version.php index 2e6b386391..9bf424bfdc 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.9-alpha-60404'; +$wp_version = '6.9-alpha-60405'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.