Chat & Text
Streaming Responses
Set "stream": true to receive tokens incrementally via Server-Sent Events (SSE). This keeps UIs responsive while long outputs are being generated.
Streaming is available on all chat completion endpoints. The response uses thetext/event-stream content type.
Python (SSE)
python
import requests, json
BASE_URL = "https://api.oneinfer.ai"
token = requests.post(
f"{BASE_URL}/v1/ula/oauth-authentication?api_key=YOUR_API_KEY"
).json()["access_token"]
with requests.post(
f"{BASE_URL}/v1/ula/chat/completions",
headers={
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
},
json={
"provider": "anthropic",
"model": "claude-sonnet-4-6",
"messages": [{"role": "user", "content": "Explain transformer architecture."}],
"max_tokens": 512,
"stream": True,
},
stream=True,
) as r:
for line in r.iter_lines():
if line and line.startswith(b"data:"):
payload = json.loads(line[5:])
print(payload.get("delta", ""), end="", flush=True)