Skip to content

live_audio.py

This example captures live audio from the host's default microphone with fromAudioDevice and runs a wake-word spotter for a fixed duration.

It needs a real default capture device. The acceptance test only smoke-tests --help unless SNSR_RUN_LIVE_AUDIO=1 is set.

Instructions

  1. Set up the sample project environment:

    cd ~/Sensory/TrulyNaturalSDK/7.8.0-pre.2/sample/python
    uv venv
    uv sync
    
  2. Run the sample and say "voice genie" during the capture window:

    uv run src/live_audio.py --duration 10
    

    Increase --duration if you need more time before speaking.

Code

Available in this TrulyNatural SDK installation at ~/Sensory/TrulyNaturalSDK/7.8.0-pre.2/sample/python/src/live_audio.py

live_audio.py

"""Live microphone capture for the TrulyNatural SDK Python binding.

Opens the host's default audio input device via
``Stream.from_audio_device``, wires it to a phrase-spotter
session, and prints results as they arrive in real time. Capture
runs for ``--duration`` seconds (default 10).

This sample exercises the SDK's host-audio backend (ALSA on
Linux, Audio Queue Services on macOS, the Windows Multimedia
Extensions wave API on Windows). It needs a real default capture
device and is **not** part of the default acceptance test sweep
because CI runners typically have no microphone; the acceptance
test only verifies that the script imports and parses arguments
cleanly. To run it for real, plug in a microphone and execute::

    uv run src/live_audio.py [--sdk-root PATH] [--duration SECS]

then say "voice genie" within the capture window. The opt-in env
toggle ``SNSR_RUN_LIVE_AUDIO=1`` flips the acceptance test from
help-only to a full live-audio run.
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

import snsr


MODEL = "spot-voicegenie-enUS-6.5.1-m.snsr"
DEFAULT_DURATION_S = 10.0
SAMPLES_PER_SECOND = 16_000  # snsr default capture format
BYTES_PER_SAMPLE = 2  # 16-bit LPCM
CHUNK_BYTES = 480


def default_sdk_root() -> Path:
    return Path(__file__).resolve().parents[3]


def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
    parser.add_argument(
        "--sdk-root",
        type=Path,
        default=default_sdk_root(),
        help="TrulyNatural SDK install root (default: auto-detect)",
    )
    parser.add_argument(
        "--duration",
        type=float,
        default=DEFAULT_DURATION_S,
        help=f"capture duration in seconds (default: {DEFAULT_DURATION_S})",
    )
    return parser.parse_args(argv)


def run_live_audio(model_path: Path, duration_s: float) -> int:
    """Run the spotter on the default capture device for ``duration_s``."""
    count = 0

    def on_result(s: snsr.Session, _key: bytes) -> None:
        nonlocal count
        count += 1
        text = s.get_string(snsr.RES_TEXT)
        score = s.get_double(snsr.RES_SCORE)
        print(f"  spotted {text!r} (score {score:.4f})")

    print(f"snsr {snsr.VERSION}")
    print(f"  model: {model_path}")
    print(f"  capturing for {duration_s:.1f}s; say 'voice genie'...")
    print()

    bytes_to_capture = int(duration_s * SAMPLES_PER_SECOND) * BYTES_PER_SAMPLE
    captured = 0

    with snsr.Session(str(model_path)) as s:
        s.require(snsr.TASK_TYPE, snsr.PHRASESPOT)
        s.set_handler(snsr.RESULT_EVENT, on_result)
        with snsr.Stream.from_audio_device() as mic:
            while captured < bytes_to_capture:
                chunk = mic.read(CHUNK_BYTES)
                if not chunk:
                    break
                s.push(snsr.SOURCE_AUDIO_PCM, chunk)
                captured += len(chunk)
        s.stop()

    print()
    print(f"done: {count} result event(s) over {captured / (SAMPLES_PER_SECOND * BYTES_PER_SAMPLE):.1f}s of audio")
    return count


def main(argv: list[str] | None = None) -> int:
    args = parse_args(argv)
    sdk_root: Path = args.sdk_root.resolve()
    model_path = sdk_root / "model" / MODEL

    if not model_path.is_file():
        print(f"error: model not found: {model_path}", file=sys.stderr)
        print(
            f"hint: pass --sdk-root pointing at a TrulyNatural SDK install",
            file=sys.stderr,
        )
        return 2

    if args.duration <= 0:
        print(f"error: --duration must be positive, got {args.duration}", file=sys.stderr)
        return 2

    run_live_audio(model_path, args.duration)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())