Skip to content

Commit

Permalink
Defines valid sentence-endings more strictly
Browse files Browse the repository at this point in the history
  • Loading branch information
Natalie-T-E committed Dec 17, 2024
1 parent 1b3e16b commit 2e91271
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
2 changes: 1 addition & 1 deletion gdex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _de_is_misparsed(sent: Span) -> bool:
return True

last_token = tokens[-1]
if last_token.pos_ != "PUNCT":
if last_token.text not in {".", "?", "!"}:
return True

return False
Expand Down
2 changes: 2 additions & 0 deletions tests/test_gdex.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def test_misparsed():
assert_knockout("Ein Satz ohne Satzzeichen")
assert_knockout("ein Satz, der mit Kleinbuchstaben beginnt.")
assert_knockout(": Ein Satz mit Interpunktion am Anfang.")
assert_knockout("Ein Satz, der nach einem Komma geteilt wurde,")
assert_knockout("Der nächste Satz gehört inhaltlich eng zu diesem:")


def test_finite_verb_and_subject():
Expand Down

0 comments on commit 2e91271

Please sign in to comment.