stt_push.py stt¶
This example shows push-mode Speech-to-Text. The application owns the audio source, reads a WAV file in small chunks, and feeds each chunk to the recognizer with push.
STT support is a TrulyNatural-only feature. On builds that do not include STT, the sample prints "STT not supported" and exits successfully.
Instructions¶
-
Set up the sample project environment:
cd ~/Sensory/TrulyNaturalSDK/7.8.0-pre.2/sample/python uv venv uv sync -
Run the sample:
uv run src/stt_push.pyOn STT-capable builds, the sample prints the final recognition result.
Code¶
Available in this TrulyNatural SDK installation at ~/Sensory/TrulyNaturalSDK/7.8.0-pre.2/sample/python/src/stt_push.py
stt_push.py
"""Push-mode Speech-To-Text for the TrulyNatural SDK Python binding.
Loads the automotive STT model and feeds a WAV file into the
session in 480-byte chunks via ``Session.push``, printing each
``RESULT_EVENT`` as it arrives. This is the streaming counterpart
to ``hello_world.py``'s pull-mode ``set_stream`` + ``run`` loop:
the application owns the audio source and decides when to feed
samples in, which is the right shape for live audio, network
streams, or any other producer that does not look like a file.
Modelled on ``tests/test_snsr.py::test_Session_push_spotter`` and
``::test_Session_run_stt_reset`` from the ``snsr`` binding test
suite.
STT support is a TrulyNatural-only feature. On builds that do not
include it (notably TrulyHandsfree), the script prints a clear
"STT not supported" line and exits 0 so it composes cleanly with
the SDK's acceptance test.
Usage::
uv run src/stt_push.py [--sdk-root PATH]
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
import snsr
MODEL = "stt-enUS-automotive-medium-2.3.15-pnc.snsr"
AUDIO = "voice-genie-set-cruise-control.wav"
CHUNK_BYTES = 480
CUSTOM_VOCAB = "voice genie, set cruise control"
def default_sdk_root() -> Path:
return Path(__file__).resolve().parents[3]
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
parser.add_argument(
"--sdk-root",
type=Path,
default=default_sdk_root(),
help="TrulyNatural SDK install root (default: auto-detect)",
)
return parser.parse_args(argv)
def stt_supported() -> bool:
"""Return True if the loaded ``snsr`` build supports STT.
``STT_SUPPORT`` is a session-level int setting; we open a
transient session purely to query it and close it before
loading the actual STT model.
"""
with snsr.Session() as s:
return bool(s.get_int(snsr.STT_SUPPORT))
def run_stt_push(model_path: Path, audio_path: Path) -> int:
"""Push ``audio_path`` into a fresh STT session, return event count."""
count = 0
def on_result(s: snsr.Session, _key: bytes) -> None:
nonlocal count
count += 1
text = s.get_string(snsr.RES_TEXT)
print(f" result: {text!r}")
print(f"snsr {snsr.VERSION}")
print(f" model: {model_path}")
print(f" audio: {audio_path}")
print(f" vocab: {CUSTOM_VOCAB!r}")
print()
with snsr.Session(str(model_path)) as s:
s.set_string(snsr.CUSTOM_VOCAB, CUSTOM_VOCAB)
s.set_handler(snsr.RESULT_EVENT, on_result)
with snsr.Stream.from_audio_file(str(audio_path)) as audio:
for chunk in iter(lambda: audio.read(CHUNK_BYTES), b""):
s.push(snsr.SOURCE_AUDIO_PCM, chunk)
s.stop()
print()
print(f"done: {count} result event(s)")
return count
def main(argv: list[str] | None = None) -> int:
args = parse_args(argv)
sdk_root: Path = args.sdk_root.resolve()
if not stt_supported():
print(
"STT not supported in this TrulyNatural build "
"(snsr.STT_SUPPORT == 0); skipping."
)
return 0
model_path = sdk_root / "model" / MODEL
audio_path = sdk_root / "data" / "audio" / AUDIO
for label, path in (("model", model_path), ("audio", audio_path)):
if not path.is_file():
print(f"error: {label} not found: {path}", file=sys.stderr)
print(
f"hint: pass --sdk-root pointing at a TrulyNatural SDK install",
file=sys.stderr,
)
return 2
run_stt_push(model_path, audio_path)
return 0
if __name__ == "__main__":
raise SystemExit(main())