Skip to content

Commit

Permalink
Fix: Add null terminators to character arrays to include final charac…
Browse files Browse the repository at this point in the history
…ters

Adds '\0' to the end of carray definitions like digits(),
ensuring that all intended characters are included in the bitset.
Without this, the last  character was previously excluded,
causing methods (like is_digit()) to fail for the final character.
  • Loading branch information
alexbarev committed Dec 7, 2024
1 parent 86f53d9 commit 245c85a
Showing 1 changed file with 53 additions and 45 deletions.
98 changes: 53 additions & 45 deletions include/stringzilla/stringzilla.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,13 @@ inline void memcpy(void *target, void const *source, std::size_t n) noexcept {
* @brief The concatenation of the `ascii_lowercase` and `ascii_uppercase`. This value is not locale-dependent.
* https://docs.python.org/3/library/string.html#string.ascii_letters
*/
inline carray<52> const &ascii_letters() noexcept {
static carray<52> const all = {
inline carray<53> const &ascii_letters() noexcept {
static carray<53> const all = {
//
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', //
'\0',
};
return all;
}
Expand All @@ -133,11 +134,12 @@ inline carray<52> const &ascii_letters() noexcept {
* @brief The lowercase letters "abcdefghijklmnopqrstuvwxyz". This value is not locale-dependent.
* https://docs.python.org/3/library/string.html#string.ascii_lowercase
*/
inline carray<26> const &ascii_lowercase() noexcept {
static carray<26> const all = {
inline carray<27> const &ascii_lowercase() noexcept {
static carray<27> const all = {
//
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', //
'\0',
};
return all;
}
Expand All @@ -146,11 +148,12 @@ inline carray<26> const &ascii_lowercase() noexcept {
* @brief The uppercase letters "ABCDEFGHIJKLMNOPQRSTUVWXYZ". This value is not locale-dependent.
* https://docs.python.org/3/library/string.html#string.ascii_uppercase
*/
inline carray<26> const &ascii_uppercase() noexcept {
static carray<26> const all = {
inline carray<27> const &ascii_uppercase() noexcept {
static carray<27> const all = {
//
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', //
'\0',
};
return all;
}
Expand All @@ -160,14 +163,15 @@ inline carray<26> const &ascii_uppercase() noexcept {
* A combination of `digits`, `ascii_letters`, `punctuation`, and `whitespace`.
* https://docs.python.org/3/library/string.html#string.printable
*/
inline carray<100> const &ascii_printables() noexcept {
static carray<100> const all = {
inline carray<101> const &ascii_printables() noexcept {
static carray<101> const all = {
//
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D',
'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<',
'=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', ' ', '\t', '\n', '\r', '\f', '\v',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D',
'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<',
'=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', ' ', '\t', '\n', '\r', '\f', '\v', //
'\0',
};
return all;
}
Expand All @@ -176,11 +180,12 @@ inline carray<100> const &ascii_printables() noexcept {
* @brief Non-printable ASCII control characters.
* Includes all codes from 0 to 31 and 127.
*/
inline carray<33> const &ascii_controls() noexcept {
static carray<33> const all = {
inline carray<34> const &ascii_controls() noexcept {
static carray<34> const all = {
//
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, //
'\0',
};
return all;
}
Expand All @@ -189,20 +194,21 @@ inline carray<33> const &ascii_controls() noexcept {
* @brief The digits "0123456789".
* https://docs.python.org/3/library/string.html#string.digits
*/
inline carray<10> const &digits() noexcept {
static carray<10> const all = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
inline carray<11> const &digits() noexcept {
static carray<11> const all = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '\0'};
return all;
}

/**
* @brief The letters "0123456789abcdefABCDEF".
* https://docs.python.org/3/library/string.html#string.hexdigits
*/
inline carray<22> const &hexdigits() noexcept {
static carray<22> const all = {
inline carray<23> const &hexdigits() noexcept {
static carray<23> const all = {
//
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F', //
'\0',
};
return all;
}
Expand All @@ -211,8 +217,8 @@ inline carray<22> const &hexdigits() noexcept {
* @brief The letters "01234567".
* https://docs.python.org/3/library/string.html#string.octdigits
*/
inline carray<8> const &octdigits() noexcept {
static carray<8> const all = {'0', '1', '2', '3', '4', '5', '6', '7'};
inline carray<9> const &octdigits() noexcept {
static carray<9> const all = {'0', '1', '2', '3', '4', '5', '6', '7', '\0'};
return all;
}

Expand All @@ -221,11 +227,12 @@ inline carray<8> const &octdigits() noexcept {
* !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~.
* https://docs.python.org/3/library/string.html#string.punctuation
*/
inline carray<32> const &punctuation() noexcept {
static carray<32> const all = {
inline carray<33> const &punctuation() noexcept {
static carray<33> const all = {
//
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':',
';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~',
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':',
';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', //
'\0',
};
return all;
}
Expand All @@ -235,29 +242,30 @@ inline carray<32> const &punctuation() noexcept {
* This includes space, tab, linefeed, return, formfeed, and vertical tab.
* https://docs.python.org/3/library/string.html#string.whitespace
*/
inline carray<6> const &whitespaces() noexcept {
static carray<6> const all = {' ', '\t', '\n', '\r', '\f', '\v'};
inline carray<7> const &whitespaces() noexcept {
static carray<7> const all = {' ', '\t', '\n', '\r', '\f', '\v', '\0'};
return all;
}

/**
* @brief ASCII characters that are considered line delimiters.
* https://docs.python.org/3/library/stdtypes.html#str.splitlines
*/
inline carray<8> const &newlines() noexcept {
static carray<8> const all = {'\n', '\r', '\f', '\v', '\x1C', '\x1D', '\x1E', '\x85'};
inline carray<9> const &newlines() noexcept {
static carray<9> const all = {'\n', '\r', '\f', '\v', '\x1C', '\x1D', '\x1E', '\x85', '\0'};
return all;
}

/**
* @brief ASCII characters forming the BASE64 encoding alphabet.
*/
inline carray<64> const &base64() noexcept {
static carray<64> const all = {
inline carray<65> const &base64() noexcept {
static carray<65> const all = {
//
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', //
'\0',
};
return all;
}
Expand Down

0 comments on commit 245c85a

Please sign in to comment.