Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pdfplumber | |
import pytesseract | |
from PIL import Image | |
from transformers import pipeline | |
from sentence_transformers import SentenceTransformer, util | |
# Load Hugging Face models | |
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_file): | |
text = "" | |
with pdfplumber.open(pdf_file) as pdf: | |
for page in pdf.pages: | |
text += page.extract_text() + "\n" | |
return text | |
# Function to extract text from image using OCR | |
def extract_text_from_image(image_file): | |
image = Image.open(image_file) | |
return pytesseract.image_to_string(image) | |
# Function to process document and answer questions | |
def document_processor(uploaded_file, query): | |
text = "" | |
if uploaded_file.name.endswith(".pdf"): | |
text = extract_text_from_pdf(uploaded_file.name) | |
elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")): | |
text = extract_text_from_image(uploaded_file.name) | |
else: | |
text = uploaded_file.read().decode("utf-8") | |
if query.lower() == "summarize": | |
summary = summarizer(text, max_length=150, min_length=30, do_sample=False) | |
return summary[0]["summary_text"] | |
# Find the best-matching answer | |
answer = qa_pipeline(question=query, context=text) | |
return answer["answer"] | |
# Gradio UI | |
with gr.Blocks() as app: | |
gr.Markdown("# π Smart Document Explorer") | |
with gr.Row(): | |
uploaded_file = gr.File(label="Upload Document (PDF, Image, or Text)") | |
query = gr.Textbox(label="Ask a question (or type 'summarize')", placeholder="What is this document about?") | |
output_text = gr.Textbox(label="AI Response") | |
submit_btn = gr.Button("Process Document") | |
submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=output_text) | |
app.launch() | |