Skip to content

Commit

Permalink
Add ReazonSpeech Japanese models (#630)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Aug 1, 2024
1 parent 1a4dce5 commit b48cf09
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def get_version():
.. _Node: https://nodejs.org/en
.. _SenseVoice: https://github.com/FunAudioLLM/SenseVoice
.. _LibriTTS-R: https://www.openslr.org/141/
.. _ReazonSpeech: https://github.com/reazon-research/ReazonSpeech
"""


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/Users/fangjun/open-source/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:375 ./build/bin/sherpa-onnx-offline --tokens=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt --encoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/encoder-epoch-99-avg-1.int8.onnx --decoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/decoder-epoch-99-avg-1.onnx --joiner=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/joiner-epoch-99-avg-1.int8.onnx --num-threads=1 ./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/test_wavs/1.wav

OfflineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80, low_freq=20, high_freq=-400, dither=0), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/encoder-epoch-99-avg-1.int8.onnx", decoder_filename="./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/decoder-epoch-99-avg-1.onnx", joiner_filename="./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/joiner-epoch-99-avg-1.int8.onnx"), paraformer=OfflineParaformerModelConfig(model=""), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), sense_voice=OfflineSenseVoiceModelConfig(model="", language="auto", use_itn=False), telespeech_ctc="", tokens="./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt", num_threads=1, debug=False, provider="cpu", model_type="", modeling_unit="cjkchar", bpe_vocab=""), lm_config=OfflineLMConfig(model="", scale=0.5), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0, rule_fsts="", rule_fars="")
Creating recognizer ...
Started
Done!

./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/test_wavs/1.wav
{"text": "気象庁は雪や路面の凍結による交通への影響暴風雪や高波に警戒するとともに雪崩や屋根からの落雪にも十分注意するよう呼びかけています", "timestamps": [0.00, 0.48, 0.64, 0.88, 1.24, 1.44, 1.80, 2.00, 2.12, 2.40, 2.56, 2.80, 2.96, 3.04, 3.44, 3.60, 3.88, 4.00, 4.28, 4.40, 4.76, 4.96, 5.20, 5.40, 5.72, 5.92, 6.20, 6.48, 6.64, 6.88, 6.96, 7.08, 7.28, 7.48, 7.64, 8.00, 8.16, 8.36, 8.68, 8.80, 9.04, 9.12, 9.28, 9.64, 9.80, 10.00, 10.16, 10.44, 10.64, 10.92, 11.04, 11.24, 11.36, 11.52, 11.60, 11.88, 11.92, 12.16, 12.28, 12.44, 12.64, 13.16, 13.20], "tokens":["気", "象", "庁", "は", "雪", "や", "路", "面", "の", "凍", "結", "に", "よ", "る", "交", "通", "へ", "の", "影", "響", "暴", "風", "雪", "や", "高", "波", "に", "警", "戒", "す", "る", "と", "と", "も", "に", "雪", "崩", "や", "屋", "根", "か", "ら", "の", "落", "雪", "に", "も", "十", "分", "注", "意", "す", "る", "よ", "う", "呼", "び", "か", "け", "て", "い", "ま", "す"], "words": []}
----
num threads: 1
decoding method: greedy_search
Elapsed seconds: 0.719 s
Real time factor (RTF): 0.719 / 13.433 = 0.054
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/Users/fangjun/open-source/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:375 ./build/bin/sherpa-onnx-offline --tokens=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt --encoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/encoder-epoch-99-avg-1.onnx --decoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/decoder-epoch-99-avg-1.onnx --joiner=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/joiner-epoch-99-avg-1.onnx --num-threads=1 ./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/test_wavs/1.wav

OfflineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80, low_freq=20, high_freq=-400, dither=0), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/encoder-epoch-99-avg-1.onnx", decoder_filename="./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/decoder-epoch-99-avg-1.onnx", joiner_filename="./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/joiner-epoch-99-avg-1.onnx"), paraformer=OfflineParaformerModelConfig(model=""), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), sense_voice=OfflineSenseVoiceModelConfig(model="", language="auto", use_itn=False), telespeech_ctc="", tokens="./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt", num_threads=1, debug=False, provider="cpu", model_type="", modeling_unit="cjkchar", bpe_vocab=""), lm_config=OfflineLMConfig(model="", scale=0.5), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0, rule_fsts="", rule_fars="")
Creating recognizer ...
Started
Done!

./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/test_wavs/1.wav
{"text": "気象庁は雪や路面の凍結による交通への影響暴風雪や高波に警戒するとともに雪崩や屋根からの落雪にも十分注意するよう呼びかけています", "timestamps": [0.00, 0.48, 0.64, 0.88, 1.24, 1.44, 1.80, 2.00, 2.12, 2.40, 2.56, 2.80, 2.96, 3.04, 3.44, 3.60, 3.88, 4.00, 4.28, 4.40, 4.76, 4.96, 5.20, 5.40, 5.72, 5.92, 6.16, 6.48, 6.64, 6.88, 6.96, 7.08, 7.28, 7.48, 7.64, 8.00, 8.16, 8.36, 8.68, 8.80, 9.04, 9.12, 9.28, 9.64, 9.80, 10.00, 10.16, 10.44, 10.64, 10.92, 11.04, 11.24, 11.36, 11.52, 11.64, 11.88, 11.92, 12.16, 12.28, 12.44, 12.64, 13.16, 13.20], "tokens":["気", "象", "庁", "は", "雪", "や", "路", "面", "の", "凍", "結", "に", "よ", "る", "交", "通", "へ", "の", "影", "響", "暴", "風", "雪", "や", "高", "波", "に", "警", "戒", "す", "る", "と", "と", "も", "に", "雪", "崩", "や", "屋", "根", "か", "ら", "の", "落", "雪", "に", "も", "十", "分", "注", "意", "す", "る", "よ", "う", "呼", "び", "か", "け", "て", "い", "ま", "す"], "words": []}
----
num threads: 1
decoding method: greedy_search
Elapsed seconds: 1.101 s
Real time factor (RTF): 1.101 / 13.433 = 0.082
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,164 @@ Zipformer-transducer-based Models
Please refer to :ref:`install_sherpa_onnx` to install `sherpa-onnx`
before you read this section.

sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01 (Japanese, 日语)
------------------------------------------------------------------

This model is from `ReazonSpeech`_ and supports only Japanese.
It is trained by 35k hours of data.

The code for training the model can be found at
`<https://github.com/k2-fsa/icefall/tree/master/egs/reazonspeech/ASR>`_

Paper about the dataset is `<https://research.reazon.jp/_static/reazonspeech_nlp2023.pdf>`_

In the following, we describe how to download it and use it with `sherpa-onnx`_.


.. hint::

The original onnx model is from

`<https://huggingface.co/reazon-research/reazonspeech-k2-v2>`_

Download the model
~~~~~~~~~~~~~~~~~~

Please use the following commands to download it.

.. code-block:: bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
# For Chinese users, you can use the following mirror
# wget https://hub.nuaa.cf/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
tar xvf sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
rm sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
ls -lh sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01
You should see the following output:

.. code-block:: bash
-rw-r--r-- 1 fangjun staff 1.2K Aug 1 18:32 README.md
-rw-r--r-- 1 fangjun staff 2.8M Aug 1 18:32 decoder-epoch-99-avg-1.int8.onnx
-rw-r--r-- 1 fangjun staff 11M Aug 1 18:32 decoder-epoch-99-avg-1.onnx
-rw-r--r-- 1 fangjun staff 148M Aug 1 18:32 encoder-epoch-99-avg-1.int8.onnx
-rw-r--r-- 1 fangjun staff 565M Aug 1 18:32 encoder-epoch-99-avg-1.onnx
-rw-r--r-- 1 fangjun staff 2.6M Aug 1 18:32 joiner-epoch-99-avg-1.int8.onnx
-rw-r--r-- 1 fangjun staff 10M Aug 1 18:32 joiner-epoch-99-avg-1.onnx
drwxr-xr-x 8 fangjun staff 256B Aug 1 18:31 test_wavs
-rw-r--r-- 1 fangjun staff 45K Aug 1 18:32 tokens.txt
Decode wave files
~~~~~~~~~~~~~~~~~

.. hint::

It supports decoding only wave files of a single channel with 16-bit
encoded samples, while the sampling rate does not need to be 16 kHz.

fp32
^^^^

The following code shows how to use ``fp32`` models to decode wave files:

.. code-block:: bash
cd /path/to/sherpa-onnx
./build/bin/sherpa-onnx-offline \
--tokens=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt \
--encoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/encoder-epoch-99-avg-1.onnx \
--decoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/decoder-epoch-99-avg-1.onnx \
--joiner=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/joiner-epoch-99-avg-1.onnx \
--num-threads=1 \
./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/test_wavs/1.wav
.. note::

Please use ``./build/bin/Release/sherpa-onnx-offline.exe`` for Windows.

.. caution::

If you use Windows and get encoding issues, please run:

.. code-block:: bash
CHCP 65001
in your commandline.

You should see the following output:

.. literalinclude:: ./code-zipformer/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.txt

int8
^^^^

The following code shows how to use ``int8`` models to decode wave files:

.. code-block:: bash
cd /path/to/sherpa-onnx
./build/bin/sherpa-onnx-offline \
--tokens=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt \
--encoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/encoder-epoch-99-avg-1.int8.onnx \
--decoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/decoder-epoch-99-avg-1.onnx \
--joiner=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/joiner-epoch-99-avg-1.int8.onnx \
--num-threads=1 \
./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/test_wavs/1.wav
.. note::

Please use ``./build/bin/Release/sherpa-onnx-offline.exe`` for Windows.

.. caution::

If you use Windows and get encoding issues, please run:

.. code-block:: bash
CHCP 65001
in your commandline.

You should see the following output:

.. literalinclude:: ./code-zipformer/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01-int8.txt

Speech recognition from a microphone
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. code-block:: bash
cd /path/to/sherpa-onnx
./build/bin/sherpa-onnx-microphone-offline \
--tokens=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt \
--encoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/encoder-epoch-99-avg-1.int8.onnx \
--decoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/decoder-epoch-99-avg-1.onnx \
--joiner=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/joiner-epoch-99-avg-1.int8.onnx
Speech recognition from a microphone with VAD
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. code-block:: bash
cd /path/to/sherpa-onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
./build/bin/sherpa-onnx-vad-microphone-offline-asr \
--silero-vad-model=./silero_vad.onnx \
--tokens=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt \
--encoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/encoder-epoch-99-avg-1.int8.onnx \
--decoder=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/decoder-epoch-99-avg-1.onnx \
--joiner=./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/joiner-epoch-99-avg-1.int8.onnx
sherpa-onnx-zipformer-korean-2024-06-24 (Korean, 韩语)
------------------------------------------------------------

Expand Down
Binary file modified docs/source/onnx/sense-voice/pic/python-websocket/client-1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/source/onnx/sense-voice/python-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ Please visit `<http://localhost:6006>`_.
- Incorrect/Wrong address: `<http://a.b.c.d:6006>`_
- Incorrect/Wrong address: `<https://a.b.c.d:6006>`_

After starting you browser, you should see the following page:
After starting the browser, you should see the following page:

.. image:: ./pic/python-websocket/client-1.jpg
:align: center
Expand Down

0 comments on commit b48cf09

Please sign in to comment.