From 73caf0adba145e077a4123d96e9f3020d7d088e4 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 1 Nov 2023 21:55:26 +0100 Subject: [PATCH 1/2] optimize strcspn --- ext/standard/string.c | 29 +-- ext/standard/tests/strings/bug39032.phpt | 2 +- .../tests/strings/strcspn_variation10.phpt | 28 +-- .../tests/strings/strcspn_variation11.phpt | 120 +++++----- .../tests/strings/strcspn_variation12.phpt | 212 +++++++++--------- .../tests/strings/strcspn_variation6.phpt | 8 +- 6 files changed, 196 insertions(+), 203 deletions(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index 8beedfe818f55..ae8deace66da3 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -1596,8 +1596,7 @@ PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len) } /* }}} */ -/* {{{ php_strspn */ -PHPAPI size_t php_strspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end) +static size_t php_strspn_strcspn_common(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end, bool must_match) { /* Fast path for short strings. * The table lookup cannot be faster in this case because we not only have to compare, but also build the table. @@ -1605,7 +1604,7 @@ PHPAPI size_t php_strspn(const char *haystack, const char *characters, const cha * Empirically tested that the table lookup approach is only beneficial if characters is longer than 1 character. */ if (characters_end - characters == 1) { const char *ptr = haystack; - while (ptr < haystack_end && *ptr == *characters) { + while (ptr < haystack_end && (*ptr == *characters) == must_match) { ptr++; } return ptr - haystack; @@ -1626,30 +1625,24 @@ PHPAPI size_t php_strspn(const char *haystack, const char *characters, const cha } const char *ptr = haystack; - while (ptr < haystack_end && table[(unsigned char) *ptr]) { + while (ptr < haystack_end && table[(unsigned char) *ptr] == must_match) { ptr++; } return ptr - haystack; } + +/* {{{ php_strspn */ +PHPAPI size_t php_strspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end) +{ + return php_strspn_strcspn_common(haystack, characters, haystack_end, characters_end, true); +} /* }}} */ /* {{{ php_strcspn */ -PHPAPI size_t php_strcspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end) +PHPAPI size_t php_strcspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end) { - const char *p, *spanp; - char c = *s1; - - for (p = s1;;) { - spanp = s2; - do { - if (*spanp == c || p == s1_end) { - return p - s1; - } - } while (spanp++ < (s2_end - 1)); - c = *++p; - } - /* NOTREACHED */ + return php_strspn_strcspn_common(haystack, characters, haystack_end, characters_end, false); } /* }}} */ diff --git a/ext/standard/tests/strings/bug39032.phpt b/ext/standard/tests/strings/bug39032.phpt index f01df3ff4c603..ea76528072cbb 100644 --- a/ext/standard/tests/strings/bug39032.phpt +++ b/ext/standard/tests/strings/bug39032.phpt @@ -12,7 +12,7 @@ echo "Done\n"; ?> --EXPECT-- int(1) -int(0) +int(1) int(1) int(1) Done diff --git a/ext/standard/tests/strings/strcspn_variation10.phpt b/ext/standard/tests/strings/strcspn_variation10.phpt index fd69a5a5768d5..253b6924cfbdf 100644 --- a/ext/standard/tests/strings/strcspn_variation10.phpt +++ b/ext/standard/tests/strings/strcspn_variation10.phpt @@ -156,8 +156,8 @@ int(21) int(16) -- Itearation 9 -- -int(5) -int(5) +int(12) +int(12) int(2) int(2) int(12) @@ -180,8 +180,8 @@ int(16) int(5) -- Itearation 11 -- -int(0) -int(0) +int(2) +int(2) int(2) int(2) int(2) @@ -192,8 +192,8 @@ int(2) int(2) -- Itearation 12 -- -int(0) -int(0) +int(13) +int(13) int(3) int(3) int(13) @@ -204,8 +204,8 @@ int(13) int(13) -- Itearation 13 -- -int(0) -int(0) +int(14) +int(14) int(3) int(3) int(14) @@ -216,8 +216,8 @@ int(14) int(6) -- Itearation 14 -- -int(5) -int(5) +int(11) +int(11) int(2) int(2) int(11) @@ -228,8 +228,8 @@ int(11) int(11) -- Itearation 15 -- -int(5) -int(5) +int(11) +int(11) int(2) int(2) int(11) @@ -240,8 +240,8 @@ int(11) int(11) -- Itearation 16 -- -int(5) -int(5) +int(14) +int(14) int(2) int(2) int(14) diff --git a/ext/standard/tests/strings/strcspn_variation11.phpt b/ext/standard/tests/strings/strcspn_variation11.phpt index 329e06fbeed0a..5051702912883 100644 --- a/ext/standard/tests/strings/strcspn_variation11.phpt +++ b/ext/standard/tests/strings/strcspn_variation11.phpt @@ -650,20 +650,20 @@ int(0) int(16) -- Iteration 9 -- -int(5) -int(4) -int(3) +int(12) +int(11) +int(10) int(1) int(2) int(0) -int(5) -int(5) -int(4) -int(3) +int(12) +int(12) +int(11) +int(10) int(1) int(2) int(0) -int(5) +int(12) int(2) int(1) int(0) @@ -794,20 +794,20 @@ int(0) int(5) -- Iteration 11 -- +int(2) +int(1) int(0) +int(1) +int(2) int(0) +int(2) +int(2) +int(1) int(0) +int(1) +int(2) int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) +int(2) int(2) int(1) int(0) @@ -866,20 +866,20 @@ int(0) int(2) -- Iteration 12 -- -int(0) -int(5) -int(4) -int(0) +int(13) +int(12) +int(11) int(1) +int(2) int(0) -int(0) -int(0) -int(5) -int(4) -int(0) +int(13) +int(13) +int(12) +int(11) int(1) +int(2) int(0) -int(0) +int(13) int(3) int(2) int(1) @@ -938,20 +938,20 @@ int(0) int(13) -- Iteration 13 -- -int(0) +int(14) +int(13) int(12) -int(11) -int(0) int(1) +int(2) int(0) -int(0) -int(0) +int(14) +int(14) +int(13) int(12) -int(11) -int(0) int(1) +int(2) int(0) -int(0) +int(14) int(3) int(2) int(1) @@ -1010,20 +1010,20 @@ int(0) int(6) -- Iteration 14 -- -int(5) -int(4) -int(3) +int(11) +int(10) +int(9) int(1) int(2) int(0) -int(5) -int(5) -int(4) -int(3) +int(11) +int(11) +int(10) +int(9) int(1) int(2) int(0) -int(5) +int(11) int(2) int(1) int(0) @@ -1082,20 +1082,20 @@ int(0) int(11) -- Iteration 15 -- -int(5) -int(4) -int(3) +int(11) +int(10) +int(9) int(1) int(2) int(0) -int(5) -int(5) -int(4) -int(3) +int(11) +int(11) +int(10) +int(9) int(1) int(2) int(0) -int(5) +int(11) int(2) int(1) int(0) @@ -1154,20 +1154,20 @@ int(0) int(11) -- Iteration 16 -- -int(5) -int(4) -int(3) +int(14) +int(13) +int(12) int(1) int(2) int(0) -int(5) -int(5) -int(4) -int(3) +int(14) +int(14) +int(13) +int(12) int(1) int(2) int(0) -int(5) +int(14) int(2) int(1) int(0) diff --git a/ext/standard/tests/strings/strcspn_variation12.phpt b/ext/standard/tests/strings/strcspn_variation12.phpt index ea98840559b7e..40b1c93d66fba 100644 --- a/ext/standard/tests/strings/strcspn_variation12.phpt +++ b/ext/standard/tests/strings/strcspn_variation12.phpt @@ -1387,20 +1387,20 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(11) +int(12) int(0) int(0) int(1) int(2) -int(4) -int(4) +int(10) +int(11) int(0) int(0) int(1) int(2) -int(3) -int(3) +int(9) +int(10) int(0) int(0) int(1) @@ -1417,26 +1417,26 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(11) +int(12) int(0) int(0) int(1) int(2) -int(5) -int(5) +int(11) +int(12) int(0) int(0) int(1) int(2) -int(4) -int(4) +int(10) +int(11) int(0) int(0) int(1) int(2) -int(3) -int(3) +int(9) +int(10) int(0) int(0) int(1) @@ -1453,8 +1453,8 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(11) +int(12) int(0) int(0) int(1) @@ -1603,9 +1603,16 @@ int(0) -- Iteration 8 -- int(0) +int(1) +int(2) +int(1) +int(2) int(0) int(0) +int(1) +int(1) int(0) +int(1) int(0) int(0) int(0) @@ -1614,7 +1621,10 @@ int(0) int(0) int(0) int(0) +int(1) +int(1) int(0) +int(1) int(0) int(0) int(0) @@ -1623,11 +1633,22 @@ int(0) int(0) int(0) int(0) +int(1) +int(2) +int(1) +int(2) int(0) int(0) +int(1) +int(2) +int(1) +int(2) int(0) int(0) +int(1) +int(1) int(0) +int(1) int(0) int(0) int(0) @@ -1636,7 +1657,10 @@ int(0) int(0) int(0) int(0) +int(1) +int(1) int(0) +int(1) int(0) int(0) int(0) @@ -1645,34 +1669,10 @@ int(0) int(0) int(0) int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) +int(1) +int(2) +int(1) +int(2) int(0) int(0) int(1) @@ -1821,40 +1821,28 @@ int(0) -- Iteration 9 -- int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) int(1) int(2) -int(5) -int(5) +int(12) +int(13) int(0) int(0) int(1) int(2) -int(4) -int(4) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) -int(0) +int(11) +int(12) int(0) int(0) +int(1) +int(2) +int(10) +int(11) int(0) int(0) +int(1) +int(1) int(0) +int(1) int(0) int(0) int(0) @@ -1865,24 +1853,32 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(12) +int(13) int(0) int(0) int(1) int(2) -int(4) -int(4) -int(0) -int(0) -int(0) +int(12) +int(13) int(0) int(0) +int(1) +int(2) +int(11) +int(12) int(0) int(0) +int(1) +int(2) +int(10) +int(11) int(0) int(0) +int(1) +int(1) int(0) +int(1) int(0) int(0) int(0) @@ -1891,6 +1887,10 @@ int(0) int(0) int(0) int(0) +int(1) +int(2) +int(12) +int(13) int(0) int(0) int(1) @@ -2041,20 +2041,20 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(10) +int(11) int(0) int(0) int(1) int(2) -int(4) -int(4) +int(9) +int(10) int(0) int(0) int(1) int(2) -int(3) -int(3) +int(8) +int(9) int(0) int(0) int(1) @@ -2071,26 +2071,26 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(10) +int(11) int(0) int(0) int(1) int(2) -int(5) -int(5) +int(10) +int(11) int(0) int(0) int(1) int(2) -int(4) -int(4) +int(9) +int(10) int(0) int(0) int(1) int(2) -int(3) -int(3) +int(8) +int(9) int(0) int(0) int(1) @@ -2107,8 +2107,8 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(10) +int(11) int(0) int(0) int(1) @@ -2259,20 +2259,20 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(12) +int(13) int(0) int(0) int(1) int(2) -int(4) -int(4) +int(11) +int(12) int(0) int(0) int(1) int(2) -int(3) -int(3) +int(10) +int(11) int(0) int(0) int(1) @@ -2289,26 +2289,26 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(12) +int(13) int(0) int(0) int(1) int(2) -int(5) -int(5) +int(12) +int(13) int(0) int(0) int(1) int(2) -int(4) -int(4) +int(11) +int(12) int(0) int(0) int(1) int(2) -int(3) -int(3) +int(10) +int(11) int(0) int(0) int(1) @@ -2325,8 +2325,8 @@ int(0) int(0) int(1) int(2) -int(5) -int(5) +int(12) +int(13) int(0) int(0) int(1) diff --git a/ext/standard/tests/strings/strcspn_variation6.phpt b/ext/standard/tests/strings/strcspn_variation6.phpt index 7ac6f354a51b7..1e1aa10e59d3f 100644 --- a/ext/standard/tests/strings/strcspn_variation6.phpt +++ b/ext/standard/tests/strings/strcspn_variation6.phpt @@ -145,8 +145,8 @@ int(26) int(26) -- Iteration 6 -- -int(5) -int(5) +int(25) +int(25) int(2) int(2) int(25) @@ -157,8 +157,8 @@ int(25) int(25) -- Iteration 7 -- -int(5) -int(5) +int(27) +int(27) int(2) int(2) int(27) From f6351ce9f6f46f8f9a340f8b6b9143696945702d Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 2 Nov 2023 20:44:55 +0100 Subject: [PATCH 2/2] [ci skip] UPGRADING --- UPGRADING | 2 ++ 1 file changed, 2 insertions(+) diff --git a/UPGRADING b/UPGRADING index a6b0d84940ff2..834f0f03911e1 100644 --- a/UPGRADING +++ b/UPGRADING @@ -48,6 +48,8 @@ PHP 8.4 UPGRADE NOTES . round() now validates the value of the $mode parameter and throws a ValueError for invalid modes. Previously invalid modes would have been interpreted as PHP_ROUND_HALF_UP. + . strcspn() with empty $characters now returns the length of the string instead + of incorrectly stopping at the first NUL character. See GH-12592. - XML: . The xml_set_*_handler() functions now declare and check for an effective