Spaces:
Sleeping
Sleeping
File size: 7,568 Bytes
f3913f7 5ca6388 f3913f7 5ca6388 f3913f7 5ca6388 f3913f7 e4e6dc4 f3913f7 e4e6dc4 f3913f7 5ca6388 f3913f7 e4e6dc4 f3913f7 e4e6dc4 f3913f7 e4e6dc4 f3913f7 e4e6dc4 5ca6388 f3913f7 5ca6388 e4e6dc4 f3913f7 e4e6dc4 5ca6388 f3913f7 5ca6388 f3913f7 e4e6dc4 f3913f7 e4e6dc4 f3913f7 e4e6dc4 f3913f7 5ca6388 f3913f7 2987ac4 f3913f7 2987ac4 f3913f7 5ca6388 f3913f7 5ca6388 f3913f7 5ca6388 e4e6dc4 f3913f7 5ca6388 f3913f7 5ca6388 f3913f7 e4e6dc4 5ca6388 f3913f7 5ca6388 f3913f7 5ca6388 f3913f7 e4e6dc4 f3913f7 e4e6dc4 5ca6388 f3913f7 e4e6dc4 f3913f7 5ca6388 e4e6dc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
"""
Doc-Q&A app (Gradio 5.x + Llama-Index 0.12.x, June 2025)
Key upgrades
------------
βͺ Gradio 5.34βnew event system (`upload`, `clear` etc.)
βͺ Llama-Index 0.12.42β`VectorStoreIndex.from_documents` signature unchanged
βͺ MixedbreadAIEmbedding 0.3.0 β supports `batch_size`, `timeout`
βͺ Tenacity for exponential-back-off when MXBAI returns 5xx / rate limits
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import List
import gradio as gr
from tenacity import retry, wait_exponential, stop_after_attempt
from mixedbread_ai.core.api_error import ApiError
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding
from llama_index.llms.groq import Groq
from llama_parse import LlamaParse
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 1. Environment variables (fail-fast if missing)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
MXBAI_API_KEY = os.getenv("MXBAI_API_KEY")
if not all([LLAMA_CLOUD_API_KEY, GROQ_API_KEY, MXBAI_API_KEY]):
raise EnvironmentError(
"LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set in the env."
)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 2. Models & parsers (latest defaults - June 2025)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
LLM_MODEL = "llama-3.1-70b-versatile" # Groqβs best for Q&A
EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1" # 1024-dim
parser = LlamaParse(api_key=LLAMA_CLOUD_API_KEY, result_type="markdown")
SUPPORTED_EXTS = (
".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx",
".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg",
)
file_extractor = {ext: parser for ext in SUPPORTED_EXTS}
embed_model = MixedbreadAIEmbedding(
api_key = MXBAI_API_KEY,
model_name = EMBED_MODEL,
batch_size = 8, # keep requests < 100 KB
timeout = 60, # generous server-side processing window
)
llm = Groq(model=LLM_MODEL, api_key=GROQ_API_KEY)
# A simple global cache (could be swapped for Redis, etc.)
vector_index: VectorStoreIndex | None = None
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 3. Helper wrappers
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@retry(
wait=wait_exponential(multiplier=2, min=4, max=32),
stop=stop_after_attempt(4),
retry_error_callback=lambda retry_state: None, # bubble up as None
reraise=False,
)
def _safe_build_index(docs) -> VectorStoreIndex | None:
"""Retry MXBAI 503 / 429 transparently."""
try:
return VectorStoreIndex.from_documents(docs, embed_model=embed_model)
except ApiError as e:
# Tenacity will catch and retry unless non-5xx / non-429
if e.status_code not in (429, 500, 502, 503, 504):
raise
raise # trigger retry
def load_files(file: Path | None) -> str:
"""Parse uploaded file and build vector index (with retries)."""
global vector_index
if file is None:
return "β οΈ No file selected."
if file.suffix.lower() not in SUPPORTED_EXTS:
allow = ", ".join(SUPPORTED_EXTS)
return f"β οΈ Unsupported file type. Allowed: {allow}"
docs = SimpleDirectoryReader(
input_files=[str(file)],
file_extractor=file_extractor,
).load_data()
idx = _safe_build_index(docs)
if idx is None:
return "π§ Embedding service busy. Please retry in ~1 minute."
vector_index = idx
return f"β
Parsed **{file.name}** β you can start chatting!"
def respond(message: str, history: List[List[str]]):
"""Stream answer chunks to the Chatbot."""
if vector_index is None:
yield "β‘οΈ Please upload a document first."
return
query_engine = vector_index.as_query_engine(streaming=True, llm=llm)
response = query_engine.query(message)
partial = ""
for token in response.response_gen:
partial += token
yield partial
def clear():
"""Reset everything (file widget + status + index)."""
global vector_index
vector_index = None
return None, "", None # file_input, status_md, chatbot history
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 4. Gradio UI (5.x syntax)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(
theme=gr.themes.Default(
primary_hue="green",
secondary_hue="blue",
font=[gr.themes.GoogleFont("Poppins")],
),
css="footer {visibility:hidden}",
) as demo:
gr.Markdown("<h1 style='text-align:center'>DataCamp Doc Q&A π€π</h1>")
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(
label="Upload document",
file_count="single",
type="filepath",
show_label=True,
)
status_md = gr.Markdown()
with gr.Row():
clear_btn = gr.Button("Reset π", variant="secondary")
with gr.Column(scale=3):
chatbot = gr.Chatbot(height=340)
txt_box = gr.Textbox(
placeholder="Ask something about the uploaded documentβ¦",
container=False,
scale=7,
)
send_btn = gr.Button("Send", variant="primary")
# events (v5 style)
file_input.upload(
fn=load_files,
inputs=file_input,
outputs=status_md,
)
send_btn.click(
fn=respond,
inputs=[txt_box, chatbot],
outputs=chatbot,
)
clear_btn.click(
fn=clear,
outputs=[file_input, status_md, chatbot],
)
# optional: disable public OpenAPI schema (old crash guard)
demo.queue(api_open=False)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# 5. Launch
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if __name__ == "__main__":
demo.launch(share=True, server_name="0.0.0.0", server_port=7860)
|