Skip to content

Commit

Permalink
Add tests for humongous encodings
Browse files Browse the repository at this point in the history
  • Loading branch information
Lőrinc committed Feb 12, 2024
1 parent 1b9faf2 commit 4bf478b
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@
from .test_helpers import ENCODING_FACTORIES, MAX_EXAMPLES


@pytest.mark.parametrize("make_enc", ENCODING_FACTORIES)
def test_extremely_big_encoding(make_enc: Callable[[], tiktoken.Encoding]):
enc = make_enc()
for c in ["^", "0", "a", " ", "\n", "'s"]:
print(f"Validating `{c}`")

big_value = "^" * 1000000
assert big_value == enc.decode(enc.encode(big_value))

big_value = " " + big_value
assert big_value == enc.decode(enc.encode(big_value))


def test_simple():
enc = tiktoken.get_encoding("gpt2")
assert enc.encode("hello world") == [31373, 995]
Expand Down

0 comments on commit 4bf478b

Please sign in to comment.