Audio + Summarization

Book Audio Summary

Automatically summarize any book or long document and convert it into a podcast-ready audio file using OneInfer's LLM and TTS APIs.

OneInfer Chat APIOneInfer TTS APIPythonpypdf

Step-by-step guide

Extract text from a book PDF

python

from pypdf import PdfReader

def extract_text(pdf_path: str, max_chars: int = 80000) -> str:
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
        if len(text) >= max_chars:
            break
    return text[:max_chars]

book_text = extract_text("atomic_habits.pdf")
print(f"Extracted {len(book_text)} characters")

Summarize with chapter-by-chapter chunking

python

import openai

client = openai.OpenAI(
    api_key="your-oneinfer-api-key",
    base_url="https://api.oneinfer.ai/v1"
)

def summarize_chunk(chunk: str, chunk_num: int) -> str:
    response = client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
        messages=[
            {"role": "system", "content": (
                "You are a skilled book summarizer. Produce clear, engaging summaries "
                "in a podcast narration style. Use 2-3 paragraphs per section."
            )},
            {"role": "user", "content": f"Summarize this section of the book:\n\n{chunk}"}
        ],
        max_tokens=600
    )
    return response.choices[0].message.content

# Split into ~5000 char chunks and summarize each
chunk_size = 5000
chunks = [book_text[i:i+chunk_size] for i in range(0, len(book_text), chunk_size)]
summaries = [summarize_chunk(c, i+1) for i, c in enumerate(chunks[:10])]
full_summary = "\n\n".join(summaries)
print(full_summary[:500])

Convert summary to audio

python

import pathlib

def text_to_audio(text: str, output_path: str, voice: str = "nova"):
    # TTS has a 4096 token limit — split if needed
    max_len = 3000
    segments = [text[i:i+max_len] for i in range(0, len(text), max_len)]

    all_audio = b""
    for segment in segments:
        response = client.audio.speech.create(
            model="tts-1-hd",
            voice=voice,
            input=segment,
            response_format="mp3"
        )
        all_audio += response.content

    pathlib.Path(output_path).write_bytes(all_audio)
    print(f"Audio saved to {output_path}")

text_to_audio(full_summary, "book_summary.mp3", voice="nova")

Available voices: alloy, echo, fable, onyx, nova, shimmer. Use tts-1-hd for the highest quality audio output.