doc-qa-docker / app.py
kingabzpro's picture
Update app.py
f3913f7 verified
raw
history blame
7.57 kB
"""
Doc-Q&A app (Gradio 5.x + Llama-Index 0.12.x, June 2025)
Key upgrades
------------
β–ͺ Gradio 5.34β€”new event system (`upload`, `clear` etc.)
β–ͺ Llama-Index 0.12.42β€”`VectorStoreIndex.from_documents` signature unchanged
β–ͺ MixedbreadAIEmbedding 0.3.0 ➜ supports `batch_size`, `timeout`
β–ͺ Tenacity for exponential-back-off when MXBAI returns 5xx / rate limits
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import List
import gradio as gr
from tenacity import retry, wait_exponential, stop_after_attempt
from mixedbread_ai.core.api_error import ApiError
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding
from llama_index.llms.groq import Groq
from llama_parse import LlamaParse
# ──────────────────────────────────────────────────────────────────
# 1. Environment variables (fail-fast if missing)
# ──────────────────────────────────────────────────────────────────
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
MXBAI_API_KEY = os.getenv("MXBAI_API_KEY")
if not all([LLAMA_CLOUD_API_KEY, GROQ_API_KEY, MXBAI_API_KEY]):
raise EnvironmentError(
"LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set in the env."
)
# ──────────────────────────────────────────────────────────────────
# 2. Models & parsers (latest defaults - June 2025)
# ──────────────────────────────────────────────────────────────────
LLM_MODEL = "llama-3.1-70b-versatile" # Groq’s best for Q&A
EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1" # 1024-dim
parser = LlamaParse(api_key=LLAMA_CLOUD_API_KEY, result_type="markdown")
SUPPORTED_EXTS = (
".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx",
".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg",
)
file_extractor = {ext: parser for ext in SUPPORTED_EXTS}
embed_model = MixedbreadAIEmbedding(
api_key = MXBAI_API_KEY,
model_name = EMBED_MODEL,
batch_size = 8, # keep requests < 100 KB
timeout = 60, # generous server-side processing window
)
llm = Groq(model=LLM_MODEL, api_key=GROQ_API_KEY)
# A simple global cache (could be swapped for Redis, etc.)
vector_index: VectorStoreIndex | None = None
# ──────────────────────────────────────────────────────────────────
# 3. Helper wrappers
# ──────────────────────────────────────────────────────────────────
@retry(
wait=wait_exponential(multiplier=2, min=4, max=32),
stop=stop_after_attempt(4),
retry_error_callback=lambda retry_state: None, # bubble up as None
reraise=False,
)
def _safe_build_index(docs) -> VectorStoreIndex | None:
"""Retry MXBAI 503 / 429 transparently."""
try:
return VectorStoreIndex.from_documents(docs, embed_model=embed_model)
except ApiError as e:
# Tenacity will catch and retry unless non-5xx / non-429
if e.status_code not in (429, 500, 502, 503, 504):
raise
raise # trigger retry
def load_files(file: Path | None) -> str:
"""Parse uploaded file and build vector index (with retries)."""
global vector_index
if file is None:
return "⚠️ No file selected."
if file.suffix.lower() not in SUPPORTED_EXTS:
allow = ", ".join(SUPPORTED_EXTS)
return f"⚠️ Unsupported file type. Allowed: {allow}"
docs = SimpleDirectoryReader(
input_files=[str(file)],
file_extractor=file_extractor,
).load_data()
idx = _safe_build_index(docs)
if idx is None:
return "🚧 Embedding service busy. Please retry in ~1 minute."
vector_index = idx
return f"βœ… Parsed **{file.name}** β€” you can start chatting!"
def respond(message: str, history: List[List[str]]):
"""Stream answer chunks to the Chatbot."""
if vector_index is None:
yield "➑️ Please upload a document first."
return
query_engine = vector_index.as_query_engine(streaming=True, llm=llm)
response = query_engine.query(message)
partial = ""
for token in response.response_gen:
partial += token
yield partial
def clear():
"""Reset everything (file widget + status + index)."""
global vector_index
vector_index = None
return None, "", None # file_input, status_md, chatbot history
# ──────────────────────────────────────────────────────────────────
# 4. Gradio UI (5.x syntax)
# ──────────────────────────────────────────────────────────────────
with gr.Blocks(
theme=gr.themes.Default(
primary_hue="green",
secondary_hue="blue",
font=[gr.themes.GoogleFont("Poppins")],
),
css="footer {visibility:hidden}",
) as demo:
gr.Markdown("<h1 style='text-align:center'>DataCamp Doc Q&A πŸ€–πŸ“‘</h1>")
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(
label="Upload document",
file_count="single",
type="filepath",
show_label=True,
)
status_md = gr.Markdown()
with gr.Row():
clear_btn = gr.Button("Reset πŸ”„", variant="secondary")
with gr.Column(scale=3):
chatbot = gr.Chatbot(height=340)
txt_box = gr.Textbox(
placeholder="Ask something about the uploaded document…",
container=False,
scale=7,
)
send_btn = gr.Button("Send", variant="primary")
# events (v5 style)
file_input.upload(
fn=load_files,
inputs=file_input,
outputs=status_md,
)
send_btn.click(
fn=respond,
inputs=[txt_box, chatbot],
outputs=chatbot,
)
clear_btn.click(
fn=clear,
outputs=[file_input, status_md, chatbot],
)
# optional: disable public OpenAPI schema (old crash guard)
demo.queue(api_open=False)
# ──────────────────────────────────────────────────────────────────
# 5. Launch
# ──────────────────────────────────────────────────────────────────
if __name__ == "__main__":
demo.launch(share=True, server_name="0.0.0.0", server_port=7860)