Update app.py
Browse files
app.py
CHANGED
@@ -278,77 +278,97 @@ def upload_file(file_obj):
|
|
278 |
return list_file_path
|
279 |
|
280 |
|
|
|
|
|
281 |
def demo():
|
282 |
with gr.Blocks(theme="base") as demo:
|
283 |
vector_db = gr.State()
|
284 |
qa_chain = gr.State()
|
285 |
collection_name = gr.State()
|
286 |
|
287 |
-
# Update the introduction with BookMyDarshan branding
|
288 |
gr.Markdown(
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
gr.Markdown(
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
with gr.Tab("Step 1 - Upload Your Spiritual Texts"):
|
299 |
-
with gr.Row():
|
300 |
-
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your spiritual PDFs (e.g., scriptures, pilgrim guides)")
|
301 |
|
302 |
-
with gr.Tab("Step
|
303 |
-
|
304 |
-
db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value="ChromaDB", type="index", info="Choose your vector database")
|
305 |
-
with gr.Accordion("Advanced options - Document text splitter", open=False):
|
306 |
-
with gr.Row():
|
307 |
-
slider_chunk_size = gr.Slider(minimum=100, maximum=1000, value=600, step=20, label="Chunk size", info="Size of text chunks to process", interactive=True)
|
308 |
-
with gr.Row():
|
309 |
-
slider_chunk_overlap = gr.Slider(minimum=10, maximum=200, value=40, step=10, label="Chunk overlap", info="Overlap between text chunks", interactive=True)
|
310 |
-
with gr.Row():
|
311 |
-
db_progress = gr.Textbox(label="Database initialization status", value="None")
|
312 |
-
with gr.Row():
|
313 |
-
db_btn = gr.Button("Generate Spiritual Knowledge Database")
|
314 |
|
315 |
-
with gr.Tab("Step
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
330 |
|
331 |
-
with gr.Tab("Step 4
|
332 |
-
chatbot = gr.Chatbot(height=300)
|
333 |
-
with gr.Accordion("Advanced
|
334 |
with gr.Row():
|
335 |
-
doc_source1 = gr.Textbox(label="Reference 1", lines=2
|
336 |
-
source1_page = gr.Number(label="Page",
|
337 |
with gr.Row():
|
338 |
-
doc_source2 = gr.Textbox(label="Reference 2", lines=2
|
339 |
-
source2_page = gr.Number(label="Page",
|
340 |
with gr.Row():
|
341 |
-
doc_source3 = gr.Textbox(label="Reference 3", lines=2
|
342 |
-
source3_page = gr.Number(label="Page",
|
|
|
343 |
with gr.Row():
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
demo.queue().launch(debug=True)
|
352 |
|
|
|
353 |
if __name__ == "__main__":
|
354 |
demo()
|
|
|
|
278 |
return list_file_path
|
279 |
|
280 |
|
281 |
+
import gradio as gr
|
282 |
+
|
283 |
def demo():
|
284 |
with gr.Blocks(theme="base") as demo:
|
285 |
vector_db = gr.State()
|
286 |
qa_chain = gr.State()
|
287 |
collection_name = gr.State()
|
288 |
|
|
|
289 |
gr.Markdown(
|
290 |
+
"""<center><h2>PDF-based Chatbot</h2></center>
|
291 |
+
<h3>Ask any questions about your PDF documents</h3>"""
|
292 |
+
)
|
293 |
gr.Markdown(
|
294 |
+
"""<b>Note:</b> This AI assistant uses LangChain and open-source LLMs for retrieval-augmented generation (RAG) from your PDF documents.
|
295 |
+
The UI shows multiple steps to help you understand the RAG workflow. This chatbot considers past questions when generating answers (via conversational memory)
|
296 |
+
and includes document references for clarity.<br>
|
297 |
+
<br><b>Warning:</b> This demo uses free CPU Basic hardware from Hugging Face, so some steps may take time."""
|
298 |
+
)
|
|
|
|
|
|
|
299 |
|
300 |
+
with gr.Tab("Step 1: Upload PDF"):
|
301 |
+
document = gr.Files(label="Upload your PDF documents", file_count="multiple", file_types=["pdf"], interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
|
303 |
+
with gr.Tab("Step 2: Process Document"):
|
304 |
+
db_btn = gr.Radio(["ChromaDB"], label="Select Vector Database", value="ChromaDB", info="Choose your vector database")
|
305 |
+
with gr.Accordion("Advanced Options: Text Splitter", open=False):
|
306 |
+
slider_chunk_size = gr.Slider(minimum=100, maximum=1000, value=600, step=20, label="Chunk Size", info="Adjust chunk size for text splitting")
|
307 |
+
slider_chunk_overlap = gr.Slider(minimum=10, maximum=200, value=40, step=10, label="Chunk Overlap", info="Adjust overlap between chunks")
|
308 |
+
db_progress = gr.Textbox(label="Vector Database Initialization Status", value="None", interactive=False)
|
309 |
+
generate_db_btn = gr.Button("Generate Vector Database")
|
310 |
+
|
311 |
+
with gr.Tab("Step 3: Initialize QA Chain"):
|
312 |
+
llm_btn = gr.Radio(["LLM Model 1", "LLM Model 2"], label="Select LLM Model", value="LLM Model 1", info="Choose the LLM model")
|
313 |
+
with gr.Accordion("Advanced Options: LLM Model", open=False):
|
314 |
+
slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.7, step=0.1, label="Temperature", info="Adjust the model's creativity level")
|
315 |
+
slider_maxtokens = gr.Slider(minimum=224, maximum=4096, value=1024, step=32, label="Max Tokens", info="Set the maximum number of tokens")
|
316 |
+
slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Top-K Samples", info="Select the number of top-k samples")
|
317 |
+
llm_progress = gr.Textbox(label="QA Chain Initialization Status", value="None", interactive=False)
|
318 |
+
qachain_btn = gr.Button("Initialize QA Chain")
|
319 |
|
320 |
+
with gr.Tab("Step 4: Chatbot"):
|
321 |
+
chatbot = gr.Chatbot(label="Chat with your PDF", height=300)
|
322 |
+
with gr.Accordion("Advanced: Document References", open=False):
|
323 |
with gr.Row():
|
324 |
+
doc_source1 = gr.Textbox(label="Reference 1", lines=2)
|
325 |
+
source1_page = gr.Number(label="Page", interactive=True)
|
326 |
with gr.Row():
|
327 |
+
doc_source2 = gr.Textbox(label="Reference 2", lines=2)
|
328 |
+
source2_page = gr.Number(label="Page", interactive=True)
|
329 |
with gr.Row():
|
330 |
+
doc_source3 = gr.Textbox(label="Reference 3", lines=2)
|
331 |
+
source3_page = gr.Number(label="Page", interactive=True)
|
332 |
+
msg = gr.Textbox(placeholder="Type your question here...", label="Ask a Question", container=True)
|
333 |
with gr.Row():
|
334 |
+
submit_btn = gr.Button("Submit")
|
335 |
+
clear_btn = gr.Button("Clear Conversation")
|
336 |
+
|
337 |
+
# Preprocessing events
|
338 |
+
generate_db_btn.click(initialize_database, inputs=[document, slider_chunk_size, slider_chunk_overlap], outputs=[vector_db, collection_name, db_progress])
|
339 |
+
qachain_btn.click(
|
340 |
+
initialize_LLM,
|
341 |
+
inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db],
|
342 |
+
outputs=[qa_chain, llm_progress]
|
343 |
+
).then(
|
344 |
+
lambda: [None, "", 0, "", 0, "", 0],
|
345 |
+
inputs=None,
|
346 |
+
outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
|
347 |
+
queue=False
|
348 |
+
)
|
349 |
+
|
350 |
+
# Chatbot events
|
351 |
+
msg.submit(
|
352 |
+
conversation,
|
353 |
+
inputs=[qa_chain, msg, chatbot],
|
354 |
+
outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
|
355 |
+
queue=False
|
356 |
+
)
|
357 |
+
submit_btn.click(
|
358 |
+
conversation,
|
359 |
+
inputs=[qa_chain, msg, chatbot],
|
360 |
+
outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
|
361 |
+
queue=False
|
362 |
+
)
|
363 |
+
clear_btn.click(
|
364 |
+
lambda: [None, "", 0, "", 0, "", 0],
|
365 |
+
inputs=None,
|
366 |
+
outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
|
367 |
+
queue=False
|
368 |
+
)
|
369 |
demo.queue().launch(debug=True)
|
370 |
|
371 |
+
|
372 |
if __name__ == "__main__":
|
373 |
demo()
|
374 |
+
|