Voice + Text
Voice Conversational Chatbot
Build a real-time voice chatbot: speech-to-text transcription, LLM reasoning, and text-to-speech synthesis — all powered through OneInfer's unified API.
OneInfer Chat APIOneInfer Audio APIWhisperPythonFastAPI
Pipeline overview
Microphone audio→Whisper STT→LLM response→TTS synthesis→Speaker output
Step-by-step guide
1
Transcribe audio with Whisper
python
import openai
client = openai.OpenAI(
api_key="your-oneinfer-api-key",
base_url="https://api.oneinfer.ai/v1"
)
def transcribe(audio_path: str) -> str:
with open(audio_path, "rb") as f:
result = client.audio.transcriptions.create(
model="whisper-1",
file=f,
response_format="text"
)
return result2
Get a chat response from the LLM
python
from typing import Generator
def chat_stream(history: list, user_message: str) -> Generator[str, None, None]:
history.append({"role": "user", "content": user_message})
stream = client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
messages=[
{"role": "system", "content": "You are a helpful voice assistant. Be concise."},
*history
],
stream=True
)
full_response = ""
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
full_response += delta
yield delta
history.append({"role": "assistant", "content": full_response})3
Synthesize speech and play it back
python
import io
import sounddevice as sd
import soundfile as sf
def speak(text: str):
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text
)
audio_bytes = io.BytesIO(response.content)
data, samplerate = sf.read(audio_bytes)
sd.play(data, samplerate)
sd.wait()
# Full voice loop
history = []
while True:
transcript = transcribe("recorded_input.wav")
print(f"You: {transcript}")
reply = ""
for token in chat_stream(history, transcript):
reply += token
print(f"Bot: {reply}")
speak(reply)