File size: 7,568 Bytes
f3913f7
 
 
 
 
 
 
 
 
 
 
 
 
5ca6388
f3913f7
 
 
5ca6388
f3913f7
 
 
5ca6388
 
 
 
 
f3913f7
 
 
 
 
 
e4e6dc4
f3913f7
e4e6dc4
f3913f7
5ca6388
 
f3913f7
 
 
 
 
 
 
e4e6dc4
f3913f7
e4e6dc4
 
f3913f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e6dc4
f3913f7
e4e6dc4
5ca6388
f3913f7
 
 
5ca6388
e4e6dc4
f3913f7
 
e4e6dc4
5ca6388
f3913f7
 
 
 
 
 
5ca6388
 
f3913f7
 
e4e6dc4
f3913f7
 
e4e6dc4
f3913f7
 
e4e6dc4
 
f3913f7
 
 
5ca6388
 
f3913f7
 
2987ac4
 
f3913f7
2987ac4
 
f3913f7
 
 
5ca6388
 
 
 
f3913f7
5ca6388
f3913f7
5ca6388
e4e6dc4
f3913f7
 
5ca6388
 
f3913f7
 
 
 
 
 
 
5ca6388
f3913f7
e4e6dc4
5ca6388
f3913f7
 
 
 
 
5ca6388
f3913f7
5ca6388
f3913f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e6dc4
f3913f7
e4e6dc4
5ca6388
f3913f7
e4e6dc4
f3913f7
5ca6388
e4e6dc4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""
Doc-Q&A app (Gradio 5.x + Llama-Index 0.12.x, June 2025)

Key upgrades
------------
β–ͺ Gradio 5.34β€”new event system (`upload`, `clear` etc.)  
β–ͺ Llama-Index 0.12.42β€”`VectorStoreIndex.from_documents` signature unchanged  
β–ͺ MixedbreadAIEmbedding 0.3.0  ➜ supports `batch_size`, `timeout`  
β–ͺ Tenacity for exponential-back-off when MXBAI returns 5xx / rate limits
"""

from __future__ import annotations

import os
from pathlib import Path
from typing import List

import gradio as gr
from tenacity import retry, wait_exponential, stop_after_attempt
from mixedbread_ai.core.api_error import ApiError

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding
from llama_index.llms.groq import Groq
from llama_parse import LlamaParse

# ──────────────────────────────────────────────────────────────────
# 1.  Environment variables (fail-fast if missing)
# ──────────────────────────────────────────────────────────────────
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
GROQ_API_KEY        = os.getenv("GROQ_API_KEY")
MXBAI_API_KEY       = os.getenv("MXBAI_API_KEY")

if not all([LLAMA_CLOUD_API_KEY, GROQ_API_KEY, MXBAI_API_KEY]):
    raise EnvironmentError(
        "LLAMA_CLOUD_API_KEY, GROQ_API_KEY and MXBAI_API_KEY must be set in the env."
    )

# ──────────────────────────────────────────────────────────────────
# 2.  Models & parsers  (latest defaults - June 2025)
# ──────────────────────────────────────────────────────────────────
LLM_MODEL   = "llama-3.1-70b-versatile"                 # Groq’s best for Q&A
EMBED_MODEL = "mixedbread-ai/mxbai-embed-large-v1"      # 1024-dim

parser = LlamaParse(api_key=LLAMA_CLOUD_API_KEY, result_type="markdown")

SUPPORTED_EXTS = (
    ".pdf", ".docx", ".doc", ".txt", ".csv", ".xlsx",
    ".pptx", ".html", ".jpg", ".jpeg", ".png", ".webp", ".svg",
)
file_extractor = {ext: parser for ext in SUPPORTED_EXTS}

embed_model = MixedbreadAIEmbedding(
    api_key     = MXBAI_API_KEY,
    model_name  = EMBED_MODEL,
    batch_size  = 8,          # keep requests < 100 KB
    timeout     = 60,         # generous server-side processing window
)

llm = Groq(model=LLM_MODEL, api_key=GROQ_API_KEY)

# A simple global cache (could be swapped for Redis, etc.)
vector_index: VectorStoreIndex | None = None


# ──────────────────────────────────────────────────────────────────
# 3.  Helper wrappers
# ──────────────────────────────────────────────────────────────────
@retry(
    wait=wait_exponential(multiplier=2, min=4, max=32),
    stop=stop_after_attempt(4),
    retry_error_callback=lambda retry_state: None,   # bubble up as None
    reraise=False,
)
def _safe_build_index(docs) -> VectorStoreIndex | None:
    """Retry MXBAI 503 / 429 transparently."""
    try:
        return VectorStoreIndex.from_documents(docs, embed_model=embed_model)
    except ApiError as e:
        # Tenacity will catch and retry unless non-5xx / non-429
        if e.status_code not in (429, 500, 502, 503, 504):
            raise
        raise   # trigger retry


def load_files(file: Path | None) -> str:
    """Parse uploaded file and build vector index (with retries)."""
    global vector_index
    if file is None:
        return "⚠️  No file selected."

    if file.suffix.lower() not in SUPPORTED_EXTS:
        allow = ", ".join(SUPPORTED_EXTS)
        return f"⚠️  Unsupported file type. Allowed: {allow}"

    docs = SimpleDirectoryReader(
        input_files=[str(file)],
        file_extractor=file_extractor,
    ).load_data()

    idx = _safe_build_index(docs)
    if idx is None:
        return "🚧 Embedding service busy. Please retry in ~1 minute."

    vector_index = idx
    return f"βœ… Parsed **{file.name}** β€” you can start chatting!"


def respond(message: str, history: List[List[str]]):
    """Stream answer chunks to the Chatbot."""
    if vector_index is None:
        yield "➑️  Please upload a document first."
        return

    query_engine = vector_index.as_query_engine(streaming=True, llm=llm)
    response = query_engine.query(message)

    partial = ""
    for token in response.response_gen:
        partial += token
        yield partial


def clear():
    """Reset everything (file widget + status + index)."""
    global vector_index
    vector_index = None
    return None, "", None   # file_input, status_md, chatbot history


# ──────────────────────────────────────────────────────────────────
# 4.  Gradio UI  (5.x syntax)
# ──────────────────────────────────────────────────────────────────
with gr.Blocks(
    theme=gr.themes.Default(
        primary_hue="green",
        secondary_hue="blue",
        font=[gr.themes.GoogleFont("Poppins")],
    ),
    css="footer {visibility:hidden}",
) as demo:

    gr.Markdown("<h1 style='text-align:center'>DataCamp Doc Q&A πŸ€–πŸ“‘</h1>")

    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(
                label="Upload document",
                file_count="single",
                type="filepath",
                show_label=True,
            )
            status_md  = gr.Markdown()
            with gr.Row():
                clear_btn = gr.Button("Reset πŸ”„", variant="secondary")

        with gr.Column(scale=3):
            chatbot = gr.Chatbot(height=340)
            txt_box = gr.Textbox(
                placeholder="Ask something about the uploaded document…",
                container=False,
                scale=7,
            )
            send_btn = gr.Button("Send", variant="primary")

    # events (v5 style)
    file_input.upload(
        fn=load_files,
        inputs=file_input,
        outputs=status_md,
    )
    send_btn.click(
        fn=respond,
        inputs=[txt_box, chatbot],
        outputs=chatbot,
    )
    clear_btn.click(
        fn=clear,
        outputs=[file_input, status_md, chatbot],
    )

# optional: disable public OpenAPI schema (old crash guard)
demo.queue(api_open=False)

# ──────────────────────────────────────────────────────────────────
# 5.  Launch
# ──────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    demo.launch(share=True, server_name="0.0.0.0", server_port=7860)