Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,908 Bytes
f3d315e b3942f2 f3d315e b3942f2 f3d315e b3942f2 f3d315e b3942f2 f3d315e c19c0c9 e3f298a f3d315e beaf3b0 f3d315e beaf3b0 f3d315e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
import tempfile
import os
import fitz # PyMuPDF
import uuid
from middleware import Middleware
def generate_uuid(state):
# Check if UUID already exists in session state
if state["user_uuid"] is None:
# Generate a new UUID if not already set
state["user_uuid"] = str(uuid.uuid4())
return state["user_uuid"]
class PDFSearchApp:
def __init__(self):
self.indexed_docs = {}
self.current_pdf = None
def upload_and_convert(self, state, file, max_pages):
id = generate_uuid(state)
if file is None:
return "No file uploaded"
print(f"Uploading file: {file.name}, id: {id}")
try:
self.current_pdf = file.name
middleware = Middleware(id, create_collection=True)
pages = middleware.index(pdf_path=file.name, id=id, max_pages=max_pages)
self.indexed_docs[id] = True
return f"Uploaded and extracted {len(pages)} pages"
except Exception as e:
return f"Error processing PDF: {str(e)}"
def search_documents(self, state, query, num_results=5):
print(f"Searching for query: {query}")
id = generate_uuid(state)
if not self.indexed_docs[id]:
print("Please index documents first")
return "Please index documents first", "--"
if not query:
print("Please enter a search query")
return "Please enter a search query", "--"
try:
middleware = Middleware(id, create_collection=False)
search_results = middleware.search([query])[0]
page_num = search_results[0][1] + 1
print(f"Retrieved page number: {page_num}")
img_path = f"pages/{id}/page_{page_num}.png"
print(f"Retrieved image path: {img_path}")
return img_path, f"Retrieved page number: {page_num}"
except Exception as e:
return f"Error during search: {str(e)}", "--"
def create_ui():
app = PDFSearchApp()
with gr.Blocks() as demo:
state = gr.State(value={"user_uuid": None})
gr.Markdown("# Colpali Milvus Search Demo")
gr.Markdown("This demo showcases how to use [Colpali](https://github.com/illuin-tech/colpali) embeddings with [Milvus](https://milvus.io/) for pdf search.")
with gr.Tab("Upload PDFs"):
with gr.Column():
file_input = gr.File(label="Upload PDFs")
max_pages_input = gr.Slider(
minimum=1,
maximum=50,
value=20,
step=10,
label="Max Pages"
)
status = gr.Textbox(label="Status", interactive=False)
with gr.Tab("Search"):
with gr.Column():
query_input = gr.Textbox(label="Query")
num_results = gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1,
label="Number of results"
)
search_btn = gr.Button("Search")
llm_answer = gr.Textbox(label="Answer", interactive=False)
images = gr.Image(label="Pages used for RAG")
# Event handlers
file_input.change(
fn=app.upload_and_convert,
inputs=[state, file_input, max_pages_input],
outputs=[status]
)
search_btn.click(
fn=app.search_documents,
inputs=[state, query_input, num_results],
outputs=[images, llm_answer]
)
return demo
if __name__ == "__main__":
demo = create_ui()
demo.launch()
|