Spaces:

kingabzpro
/

doc-qa-docker

Sleeping

File size: 7,568 Bytes

f3913f7
 
 
 
 
 
 
 
 
 
 
 
 
5ca6388
f3913f7
 
 
5ca6388
f3913f7
 
 
5ca6388
 
 
 
 
f3913f7
 
 
 
 
 
e4e6dc4
f3913f7
e4e6dc4
f3913f7
5ca6388
 
f3913f7
 
 
 
 
 
 
e4e6dc4
f3913f7
e4e6dc4
 
f3913f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e6dc4
f3913f7
e4e6dc4
5ca6388
f3913f7
 
 
5ca6388
e4e6dc4
f3913f7
 
e4e6dc4
5ca6388
f3913f7
 
 
 
 
 
5ca6388
 
f3913f7
 
e4e6dc4
f3913f7
 
e4e6dc4
f3913f7
 
e4e6dc4
 
f3913f7
 
 
5ca6388
 
f3913f7
 
2987ac4
 
f3913f7
2987ac4
 
f3913f7
 
 
5ca6388
 
 
 
f3913f7
5ca6388
f3913f7
5ca6388
e4e6dc4
f3913f7
 
5ca6388
 
f3913f7
 
 
 
 
 
 
5ca6388
f3913f7
e4e6dc4
5ca6388
f3913f7
 
 
 
 
5ca6388
f3913f7
5ca6388
f3913f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e6dc4
f3913f7
e4e6dc4
5ca6388
f3913f7
e4e6dc4
f3913f7
5ca6388
e4e6dc4

"""
Doc-Q&A app (Gradio 5.x + Llama-Index 0.12.x, June 2025)

Key upgrades
------------
▪ Gradio 5.34—new event system (`upload`, `clear` etc.)  
▪ Llama-Index 0.12.42—`VectorStoreIndex.from_documents` signature unchanged  
▪ MixedbreadAIEmbedding 0.3.0  ➜ supports `batch_size`, `timeout`  
▪ Tenacity for exponential-back-off when MXBAI returns 5xx / rate limits
"""

from __future__ import annotations

import os
from pathlib import Path
from typing import List

import gradio as gr
from tenacity import retry, wait_exponential, stop_after_attempt
from mixedbread_ai.core.api_error import ApiError

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding
from llama_index.llms.groq import Groq
from llama_parse import LlamaParse

# ──────────────────────────────────────────────────────────────────
# 1.  Environment variables (fail-fast if missing)
# ──────────────────────────────────────────────────────────────────
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
GROQ_API_KEY        = os.getenv("GROQ_API_KEY")
MXBAI_API_KEY       = os.getenv("MXBAI_API_KEY")

if not all([LLAMA_CLOUD_API_KEY, GROQ_API_KEY, MXBAI_API_KEY]):
    raise EnvironmentError(
        "LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set in the env."
    )

# ──────────────────────────────────────────────────────────────────
# 2.  Models & parsers  (latest defaults - June 2025)
# ──────────────────────────────────────────────────────────────────
LLM_MODEL   = "llama-3.1-70b-versatile"                 # Groq’s best for Q&A
EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1"      # 1024-dim

parser = LlamaParse(api_key=LLAMA_CLOUD_API_KEY, result_type="markdown")

SUPPORTED_EXTS = (
    ".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx",
    ".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg",
)
file_extractor = {ext: parser for ext in SUPPORTED_EXTS}

embed_model = MixedbreadAIEmbedding(
    api_key     = MXBAI_API_KEY,
    model_name  = EMBED_MODEL,
    batch_size  = 8,          # keep requests < 100 KB
    timeout     = 60,         # generous server-side processing window
)

llm = Groq(model=LLM_MODEL, api_key=GROQ_API_KEY)

# A simple global cache (could be swapped for Redis, etc.)
vector_index: VectorStoreIndex | None = None


# ──────────────────────────────────────────────────────────────────
# 3.  Helper wrappers
# ──────────────────────────────────────────────────────────────────
@retry(
    wait=wait_exponential(multiplier=2, min=4, max=32),
    stop=stop_after_attempt(4),
    retry_error_callback=lambda retry_state: None,   # bubble up as None
    reraise=False,
)
def _safe_build_index(docs) -> VectorStoreIndex | None:
    """Retry MXBAI 503 / 429 transparently."""
    try:
        return VectorStoreIndex.from_documents(docs, embed_model=embed_model)
    except ApiError as e:
        # Tenacity will catch and retry unless non-5xx / non-429
        if e.status_code not in (429, 500, 502, 503, 504):
            raise
        raise   # trigger retry


def load_files(file: Path | None) -> str:
    """Parse uploaded file and build vector index (with retries)."""
    global vector_index
    if file is None:
        return "⚠️  No file selected."

    if file.suffix.lower() not in SUPPORTED_EXTS:
        allow = ", ".join(SUPPORTED_EXTS)
        return f"⚠️  Unsupported file type. Allowed: {allow}"

    docs = SimpleDirectoryReader(
        input_files=[str(file)],
        file_extractor=file_extractor,
    ).load_data()

    idx = _safe_build_index(docs)
    if idx is None:
        return "🚧 Embedding service busy. Please retry in ~1 minute."

    vector_index = idx
    return f"✅ Parsed **{file.name}** — you can start chatting!"


def respond(message: str, history: List[List[str]]):
    """Stream answer chunks to the Chatbot."""
    if vector_index is None:
        yield "➡️  Please upload a document first."
        return

    query_engine = vector_index.as_query_engine(streaming=True, llm=llm)
    response = query_engine.query(message)

    partial = ""
    for token in response.response_gen:
        partial += token
        yield partial


def clear():
    """Reset everything (file widget + status + index)."""
    global vector_index
    vector_index = None
    return None, "", None   # file_input, status_md, chatbot history


# ──────────────────────────────────────────────────────────────────
# 4.  Gradio UI  (5.x syntax)
# ──────────────────────────────────────────────────────────────────
with gr.Blocks(
    theme=gr.themes.Default(
        primary_hue="green",
        secondary_hue="blue",
        font=[gr.themes.GoogleFont("Poppins")],
    ),
    css="footer {visibility:hidden}",
) as demo:

    gr.Markdown("<h1 style='text-align:center'>DataCamp Doc Q&A 🤖📑</h1>")

    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(
                label="Upload document",
                file_count="single",
                type="filepath",
                show_label=True,
            )
            status_md  = gr.Markdown()
            with gr.Row():
                clear_btn = gr.Button("Reset 🔄", variant="secondary")

        with gr.Column(scale=3):
            chatbot = gr.Chatbot(height=340)
            txt_box = gr.Textbox(
                placeholder="Ask something about the uploaded document…",
                container=False,
                scale=7,
            )
            send_btn = gr.Button("Send", variant="primary")

    # events (v5 style)
    file_input.upload(
        fn=load_files,
        inputs=file_input,
        outputs=status_md,
    )
    send_btn.click(
        fn=respond,
        inputs=[txt_box, chatbot],
        outputs=chatbot,
    )
    clear_btn.click(
        fn=clear,
        outputs=[file_input, status_md, chatbot],
    )

# optional: disable public OpenAPI schema (old crash guard)
demo.queue(api_open=False)

# ──────────────────────────────────────────────────────────────────
# 5.  Launch
# ──────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    demo.launch(share=True, server_name="0.0.0.0", server_port=7860)