Research Tools

Open NotebookLM

Build an open-source alternative to Google's NotebookLM: upload documents, ask questions grounded in your sources, and generate podcast-style audio summaries — all with OneInfer.

OneInfer Chat APIOneInfer EmbeddingsOneInfer TTSFAISSStreamlit

Step-by-step guide

Build the document knowledge base

python

import openai
import faiss
import numpy as np
from pypdf import PdfReader
from dataclasses import dataclass

client = openai.OpenAI(
    api_key="your-oneinfer-api-key",
    base_url="https://api.oneinfer.ai/v1"
)

@dataclass
class Chunk:
    text: str
    source: str
    page: int

def load_and_chunk(pdf_path: str, chunk_size: int = 600) -> list[Chunk]:
    reader = PdfReader(pdf_path)
    chunks = []
    for i, page in enumerate(reader.pages):
        text = (page.extract_text() or "").strip()
        for j in range(0, len(text), chunk_size):
            chunks.append(Chunk(text[j:j+chunk_size], pdf_path, i+1))
    return chunks

def build_index(chunks: list[Chunk]):
    texts = [c.text for c in chunks]
    resp = client.embeddings.create(model="text-embedding-3-small", input=texts)
    vectors = np.array([d.embedding for d in resp.data], dtype="float32")
    index = faiss.IndexFlatL2(vectors.shape[1])
    index.add(vectors)
    return index, vectors

chunks = load_and_chunk("research_paper.pdf")
index, vectors = build_index(chunks)
print(f"Indexed {len(chunks)} chunks")

Answer questions grounded in sources

python

def ask(question: str, top_k: int = 5) -> dict:
    q_resp = client.embeddings.create(model="text-embedding-3-small", input=[question])
    q_vec = np.array([q_resp.data[0].embedding], dtype="float32")

    _, indices = index.search(q_vec, top_k)
    relevant = [chunks[i] for i in indices[0]]

    context = "\n\n".join(
        f"[Source: {c.source}, Page {c.page}]\n{c.text}"
        for c in relevant
    )

    response = client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
        messages=[
            {"role": "system", "content": (
                "You are a research assistant. Answer questions strictly based on "
                "the provided source material. Always cite the page number."
            )},
            {"role": "user", "content": f"Sources:\n{context}\n\nQuestion: {question}"}
        ]
    )
    return {
        "answer": response.choices[0].message.content,
        "sources": [{"source": c.source, "page": c.page} for c in relevant]
    }

result = ask("What methodology did the authors use?")
print(result["answer"])

Generate a podcast-style audio summary

python

def generate_podcast(chunks: list[Chunk], num_chunks: int = 8) -> bytes:
    sample_text = " ".join(c.text for c in chunks[:num_chunks])

    script_resp = client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
        messages=[
            {"role": "system", "content": "Write a 2-3 minute podcast script summarizing this research. Be engaging and clear."},
            {"role": "user", "content": sample_text}
        ]
    )
    script = script_resp.choices[0].message.content

    audio_resp = client.audio.speech.create(
        model="tts-1-hd",
        voice="nova",
        input=script
    )
    return audio_resp.content

audio = generate_podcast(chunks)
with open("notebook_podcast.mp3", "wb") as f:
    f.write(audio)
print("Podcast saved!")