""" Doc-Q&A app (Gradio 5.x + Llama-Index 0.12.x, June 2025) Key upgrades ------------ ▪ Gradio 5.34—new event system (`upload`, `clear` etc.) ▪ Llama-Index 0.12.42—`VectorStoreIndex.from_documents` signature unchanged ▪ MixedbreadAIEmbedding 0.3.0 ➜ supports `batch_size`, `timeout` ▪ Tenacity for exponential-back-off when MXBAI returns 5xx / rate limits """ from __future__ import annotations import os from pathlib import Path from typing import List import gradio as gr from tenacity import retry, wait_exponential, stop_after_attempt from mixedbread_ai.core.api_error import ApiError from llama_index.core import SimpleDirectoryReader, VectorStoreIndex from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding from llama_index.llms.groq import Groq from llama_parse import LlamaParse # ────────────────────────────────────────────────────────────────── # 1. Environment variables (fail-fast if missing) # ────────────────────────────────────────────────────────────────── LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY") GROQ_API_KEY = os.getenv("GROQ_API_KEY") MXBAI_API_KEY = os.getenv("MXBAI_API_KEY") if not all([LLAMA_CLOUD_API_KEY, GROQ_API_KEY, MXBAI_API_KEY]): raise EnvironmentError( "LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set in the env." ) # ────────────────────────────────────────────────────────────────── # 2. Models & parsers (latest defaults - June 2025) # ────────────────────────────────────────────────────────────────── LLM_MODEL = "llama-3.1-70b-versatile" # Groq’s best for Q&A EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1" # 1024-dim parser = LlamaParse(api_key=LLAMA_CLOUD_API_KEY, result_type="markdown") SUPPORTED_EXTS = ( ".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx", ".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg", ) file_extractor = {ext: parser for ext in SUPPORTED_EXTS} embed_model = MixedbreadAIEmbedding( api_key = MXBAI_API_KEY, model_name = EMBED_MODEL, batch_size = 8, # keep requests < 100 KB timeout = 60, # generous server-side processing window ) llm = Groq(model=LLM_MODEL, api_key=GROQ_API_KEY) # A simple global cache (could be swapped for Redis, etc.) vector_index: VectorStoreIndex | None = None # ────────────────────────────────────────────────────────────────── # 3. Helper wrappers # ────────────────────────────────────────────────────────────────── @retry( wait=wait_exponential(multiplier=2, min=4, max=32), stop=stop_after_attempt(4), retry_error_callback=lambda retry_state: None, # bubble up as None reraise=False, ) def _safe_build_index(docs) -> VectorStoreIndex | None: """Retry MXBAI 503 / 429 transparently.""" try: return VectorStoreIndex.from_documents(docs, embed_model=embed_model) except ApiError as e: # Tenacity will catch and retry unless non-5xx / non-429 if e.status_code not in (429, 500, 502, 503, 504): raise raise # trigger retry def load_files(file: Path | None) -> str: """Parse uploaded file and build vector index (with retries).""" global vector_index if file is None: return "⚠️ No file selected." if file.suffix.lower() not in SUPPORTED_EXTS: allow = ", ".join(SUPPORTED_EXTS) return f"⚠️ Unsupported file type. Allowed: {allow}" docs = SimpleDirectoryReader( input_files=[str(file)], file_extractor=file_extractor, ).load_data() idx = _safe_build_index(docs) if idx is None: return "🚧 Embedding service busy. Please retry in ~1 minute." vector_index = idx return f"✅ Parsed **{file.name}** — you can start chatting!" def respond(message: str, history: List[List[str]]): """Stream answer chunks to the Chatbot.""" if vector_index is None: yield "➡️ Please upload a document first." return query_engine = vector_index.as_query_engine(streaming=True, llm=llm) response = query_engine.query(message) partial = "" for token in response.response_gen: partial += token yield partial def clear(): """Reset everything (file widget + status + index).""" global vector_index vector_index = None return None, "", None # file_input, status_md, chatbot history # ────────────────────────────────────────────────────────────────── # 4. Gradio UI (5.x syntax) # ────────────────────────────────────────────────────────────────── with gr.Blocks( theme=gr.themes.Default( primary_hue="green", secondary_hue="blue", font=[gr.themes.GoogleFont("Poppins")], ), css="footer {visibility:hidden}", ) as demo: gr.Markdown("