diff --git a/.gitignore b/.gitignore index 632a9568..16595a12 100644 --- a/.gitignore +++ b/.gitignore @@ -104,6 +104,6 @@ venv.bak/ .mypy_cache/ # VSCod(e/ium) -.vscode/ +.vscode* vscode/ *.code-workspace diff --git a/lingua_franca/format.py b/lingua_franca/format.py index fcbd34af..02dc8951 100755 --- a/lingua_franca/format.py +++ b/lingua_franca/format.py @@ -35,6 +35,7 @@ _REGISTERED_FUNCTIONS = ("nice_number", "nice_time", "pronounce_number", + "pronounce_digits", "nice_response", "nice_duration") @@ -296,6 +297,27 @@ def pronounce_number(number, lang=None, places=2, short_scale=True, """ +@localized_function() +def pronounce_digits(number, lang=None, places=2, all_digits=True, casual=False): + """ + Pronounce a number's digits, either colloquially or in full + + In English, the colloquial way is usually to read two digits at a time, + treating each pair as a single number. + + Examples: + >>> pronounce_digits(127, all_digits=False) + 'one twenty seven' + >>> pronounce_digits(127, all_digits=True) + 'one two seven' + + Args: + number (int|float) + all_digits (bool): read every digit, rather than two digits at a time + casual (bool): in some languages, use a colloquialism for "zero", such as "oh" + """ + + def nice_date(dt, lang=None, now=None): """ Format a datetime to a pronounceable date diff --git a/lingua_franca/lang/format_en.py b/lingua_franca/lang/format_en.py index c2911fe8..00640f80 100644 --- a/lingua_franca/lang/format_en.py +++ b/lingua_franca/lang/format_en.py @@ -15,6 +15,8 @@ # limitations under the License. # +from math import modf + from lingua_franca.lang.format_common import convert_to_mixed_fraction from lingua_franca.lang.common_data_en import _NUM_STRING_EN, \ _FRACTION_STRING_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN, _LONG_ORDINAL_EN @@ -302,6 +304,84 @@ def _long_scale(n): return result +def pronounce_digits_en(number, places=2, all_digits=True, casual=False): + decimal_part = "" + integer_part = "" + back_digits = "" + result = [] + def _update_result_helper(_result, back_digits): + # if all((any((integer_part, result)), back_digits.startswith('0'))): + # _result.insert(0, zero_word) + if back_digits.startswith('0'): + _result.insert(0, zero_word) + back_digits = back_digits[1:] + if back_digits.endswith('0'): + _result.insert(-1, zero_word) + back_digits = back_digits[:-1] + return pronounce_number_en(int(back_digits)).split(" ") + _result + + # TODO make this part of common data? + zero_word = "zero" if not casual else "oh" + is_float = isinstance(number, float) + if is_float: + integer_part, decimal_part = str(number).split(".") + decimal_part = pronounce_number_en( + float("." + decimal_part), places=places) + if decimal_part.startswith("zero point"): + decimal_part = decimal_part.lstrip("zero ") + else: + integer_part = str(number) + + if all_digits: + result = [pronounce_number_en(int(i)) for i in integer_part] + if is_float: + result.append(decimal_part) + result = " ".join(result) + else: + while len(integer_part) > 1: + idx = -2 if len(integer_part) in [2, 4] else -3 + back_digits = integer_part[idx:] + integer_part = integer_part[:idx] + + front_zero = False + mid_zero = False + end_zero = False + if '0' in back_digits: + front_zero = back_digits[0] == '0' + end_zero = back_digits[-1] == '0' + _result = [f"{zero_word if front_zero else pronounce_number_en(int(back_digits[0]))}"] + if idx == -3: + mid_zero = back_digits[1] == '0' + _result.append(f"{zero_word if mid_zero else pronounce_number_en(int(back_digits[1]))}") + _result.append + _result.append(f"{zero_word if end_zero else pronounce_number_en(int(back_digits[-1]))}") + result = _result + result + else: + result = pronounce_number_en(int(back_digits)).split(" ") + result + + if integer_part: + _int = int(integer_part) + if result: + if any((integer_part.startswith('0'), integer_part.endswith('0'))): + result.insert(0, zero_word) + result.insert(0, pronounce_number_en(_int)) + + if is_float: + result.append(decimal_part) + + no_no_words = ['and', ''] + _result = list(result) + for index, word in enumerate(result): + if index < len(result) - 1: + if all((word == _SHORT_SCALE_EN[100], + result[index + 1] == 'and')): + _result[index] = 'and' # let the next pass remove this + + result = " ".join([word for word in _result if word != 'and']) + + return result + + def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False): """ Format a time to a comfortable human format diff --git a/test/test_format.py b/test/test_format.py index e6dcc373..a2ba13f4 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -33,6 +33,7 @@ from lingua_franca.format import nice_year from lingua_franca.format import nice_duration from lingua_franca.format import pronounce_number +from lingua_franca.format import pronounce_digits from lingua_franca.format import date_time_format from lingua_franca.format import join_list @@ -387,6 +388,47 @@ def test_ordinals(self): # def nice_time(dt, lang="en-us", speech=True, use_24hour=False, # use_ampm=False): +class TestPronounceDigits(unittest.TestCase): + def test_integers(self): + self.assertEqual(pronounce_digits(0, all_digits=False), "zero") + self.assertEqual(pronounce_digits(1, all_digits=False), "one") + self.assertEqual(pronounce_digits(12345, all_digits=False), "twelve three forty five") + self.assertEqual(pronounce_digits(7395, all_digits=False), "seventy three ninety five") + self.assertEqual(pronounce_digits(286, all_digits=False), "two eighty six") + self.assertEqual(pronounce_digits(2806, all_digits=False), "twenty eight zero six") + self.assertEqual(pronounce_digits(2806, all_digits=False, casual=True), "twenty eight oh six") + self.assertEqual(pronounce_digits(20806, all_digits=False), "two zero eight zero six") + self.assertEqual(pronounce_digits(20806, all_digits=False, casual=True), "two oh eight oh six") + self.assertEqual(pronounce_digits(311412, all_digits=False), "three eleven four twelve") + self.assertEqual(pronounce_digits(354808912, all_digits=False), "three fifty four eight zero eight nine twelve") + self.assertEqual(pronounce_digits(238513096, all_digits=False), "two thirty eight five thirteen zero nine six") + self.assertEqual(pronounce_digits(238513696, all_digits=False), "two thirty eight five thirteen six ninety six") + + def test_integers_all_digits(self): + self.assertEqual(pronounce_digits(0, all_digits=True), "zero") + self.assertEqual(pronounce_digits(1, all_digits=True), "one") + self.assertEqual(pronounce_digits(12345, all_digits=True), "one two three four five") + self.assertEqual(pronounce_digits(7395, all_digits=True), "seven three nine five") + + def test_floats(self): + self.assertEqual(pronounce_digits(0.1, all_digits=False), "zero point one") + self.assertEqual(pronounce_digits(0.48, all_digits=False), "zero point four eight") + self.assertEqual(pronounce_digits(6.40, all_digits=False), "six point four") + self.assertEqual(pronounce_digits(56.92, all_digits=False), "fifty six point nine two") + + def test_floats_all_digits(self): + self.assertEqual(pronounce_digits(0.7, all_digits=True), "zero point seven") + self.assertEqual(pronounce_digits(6.04, all_digits=True), "six point zero four") + self.assertEqual(pronounce_digits(6.40, all_digits=True), "six point four") + self.assertEqual(pronounce_digits(56.92, all_digits=True), "five six point nine two") + + def test_decimal_places(self): + self.assertEqual(pronounce_digits(34.6912, all_digits=False), "thirty four point six nine") + self.assertEqual(pronounce_digits(34.6912, all_digits=False, places=3), "thirty four point six nine one") + self.assertEqual(pronounce_digits(34.6912, all_digits=False, places=4), "thirty four point six nine one two") + self.assertEqual(pronounce_digits(34.6912, all_digits=False, places=5), "thirty four point six nine one two") + self.assertEqual(pronounce_digits(34.6912, all_digits=True, places=4), "three four point six nine one two") + class TestNiceDateFormat(unittest.TestCase): @classmethod