Spaces:
Sleeping
Sleeping
""" | |
Doc-Q&A app (Gradio 5.x + Llama-Index 0.12.x, June 2025) | |
Key upgrades | |
------------ | |
βͺ Gradio 5.34βnew event system (`upload`, `clear` etc.) | |
βͺ Llama-Index 0.12.42β`VectorStoreIndex.from_documents` signature unchanged | |
βͺ MixedbreadAIEmbedding 0.3.0 β supports `batch_size`, `timeout` | |
βͺ Tenacity for exponential-back-off when MXBAI returns 5xx / rate limits | |
""" | |
from __future__ import annotations | |
import os | |
from pathlib import Path | |
from typing import List | |
import gradio as gr | |
from tenacity import retry, wait_exponential, stop_after_attempt | |
from mixedbread_ai.core.api_error import ApiError | |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex | |
from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding | |
from llama_index.llms.groq import Groq | |
from llama_parse import LlamaParse | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 1. Environment variables (fail-fast if missing) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY") | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
MXBAI_API_KEY = os.getenv("MXBAI_API_KEY") | |
if not all([LLAMA_CLOUD_API_KEY, GROQ_API_KEY, MXBAI_API_KEY]): | |
raise EnvironmentError( | |
"LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set in the env." | |
) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 2. Models & parsers (latest defaults - June 2025) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
LLM_MODEL = "llama-3.1-70b-versatile" # Groqβs best for Q&A | |
EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1" # 1024-dim | |
parser = LlamaParse(api_key=LLAMA_CLOUD_API_KEY, result_type="markdown") | |
SUPPORTED_EXTS = ( | |
".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx", | |
".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg", | |
) | |
file_extractor = {ext: parser for ext in SUPPORTED_EXTS} | |
embed_model = MixedbreadAIEmbedding( | |
api_key = MXBAI_API_KEY, | |
model_name = EMBED_MODEL, | |
batch_size = 8, # keep requests < 100 KB | |
timeout = 60, # generous server-side processing window | |
) | |
llm = Groq(model=LLM_MODEL, api_key=GROQ_API_KEY) | |
# A simple global cache (could be swapped for Redis, etc.) | |
vector_index: VectorStoreIndex | None = None | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 3. Helper wrappers | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def _safe_build_index(docs) -> VectorStoreIndex | None: | |
"""Retry MXBAI 503 / 429 transparently.""" | |
try: | |
return VectorStoreIndex.from_documents(docs, embed_model=embed_model) | |
except ApiError as e: | |
# Tenacity will catch and retry unless non-5xx / non-429 | |
if e.status_code not in (429, 500, 502, 503, 504): | |
raise | |
raise # trigger retry | |
def load_files(file: Path | None) -> str: | |
"""Parse uploaded file and build vector index (with retries).""" | |
global vector_index | |
if file is None: | |
return "β οΈ No file selected." | |
if file.suffix.lower() not in SUPPORTED_EXTS: | |
allow = ", ".join(SUPPORTED_EXTS) | |
return f"β οΈ Unsupported file type. Allowed: {allow}" | |
docs = SimpleDirectoryReader( | |
input_files=[str(file)], | |
file_extractor=file_extractor, | |
).load_data() | |
idx = _safe_build_index(docs) | |
if idx is None: | |
return "π§ Embedding service busy. Please retry in ~1 minute." | |
vector_index = idx | |
return f"β Parsed **{file.name}** β you can start chatting!" | |
def respond(message: str, history: List[List[str]]): | |
"""Stream answer chunks to the Chatbot.""" | |
if vector_index is None: | |
yield "β‘οΈ Please upload a document first." | |
return | |
query_engine = vector_index.as_query_engine(streaming=True, llm=llm) | |
response = query_engine.query(message) | |
partial = "" | |
for token in response.response_gen: | |
partial += token | |
yield partial | |
def clear(): | |
"""Reset everything (file widget + status + index).""" | |
global vector_index | |
vector_index = None | |
return None, "", None # file_input, status_md, chatbot history | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 4. Gradio UI (5.x syntax) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
with gr.Blocks( | |
theme=gr.themes.Default( | |
primary_hue="green", | |
secondary_hue="blue", | |
font=[gr.themes.GoogleFont("Poppins")], | |
), | |
css="footer {visibility:hidden}", | |
) as demo: | |
gr.Markdown("<h1 style='text-align:center'>DataCamp Doc Q&A π€π</h1>") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
file_input = gr.File( | |
label="Upload document", | |
file_count="single", | |
type="filepath", | |
show_label=True, | |
) | |
status_md = gr.Markdown() | |
with gr.Row(): | |
clear_btn = gr.Button("Reset π", variant="secondary") | |
with gr.Column(scale=3): | |
chatbot = gr.Chatbot(height=340) | |
txt_box = gr.Textbox( | |
placeholder="Ask something about the uploaded documentβ¦", | |
container=False, | |
scale=7, | |
) | |
send_btn = gr.Button("Send", variant="primary") | |
# events (v5 style) | |
file_input.upload( | |
fn=load_files, | |
inputs=file_input, | |
outputs=status_md, | |
) | |
send_btn.click( | |
fn=respond, | |
inputs=[txt_box, chatbot], | |
outputs=chatbot, | |
) | |
clear_btn.click( | |
fn=clear, | |
outputs=[file_input, status_md, chatbot], | |
) | |
# optional: disable public OpenAPI schema (old crash guard) | |
demo.queue(api_open=False) | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
# 5. Launch | |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
if __name__ == "__main__": | |
demo.launch(share=True, server_name="0.0.0.0", server_port=7860) | |