Integration Patterns
Batch Processing
Process large lists of inputs concurrently using Python's ThreadPoolExecutor to maximise throughput.
Python
python
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
BASE_URL = "https://api.oneinfer.ai"
token = requests.post(
f"{BASE_URL}/v1/ula/oauth-authentication?api_key=YOUR_API_KEY"
).json()["access_token"]
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
inputs = [
"Summarise the impact of transformer models on NLP.",
"Explain the difference between fine-tuning and RAG.",
"What are the main AI inference optimisation techniques?",
"How do mixture-of-experts models work?",
]
def complete(prompt: str) -> str:
resp = requests.post(
f"{BASE_URL}/v1/ula/chat/completions",
headers=headers,
json={
"provider": "openai",
"model": "gpt-4o-mini",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 256,
"temperature": 0.5,
},
)
return resp.json()["data"]["text"]
with ThreadPoolExecutor(max_workers=4) as executor:
futures = {executor.submit(complete, p): p for p in inputs}
for future in as_completed(futures):
prompt = futures[future]
print(f"\nPrompt: {prompt[:60]}...")
print(f"Response: {future.result()[:120]}...")