Spaces:

Manishkumaryadav
/

smart-document-explorer

Runtime error

App Files Files Community

Manishkumaryadav commited on Feb 22

Commit

ab28335

verified ·

1 Parent(s): 9c5ba14

Create app.py

Browse files

Files changed (1) hide show

app.py +57 -0

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import gradio as gr
+import pdfplumber
+import pytesseract
+from PIL import Image
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer, util
+# Load Hugging Face models
+qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_file):
+    text = ""
+    with pdfplumber.open(pdf_file) as pdf:
+        for page in pdf.pages:
+            text += page.extract_text() + "\n"
+    return text
+# Function to extract text from image using OCR
+def extract_text_from_image(image_file):
+    image = Image.open(image_file)
+    return pytesseract.image_to_string(image)
+# Function to process document and answer questions
+def document_processor(uploaded_file, query):
+    text = ""
+    if uploaded_file.name.endswith(".pdf"):
+        text = extract_text_from_pdf(uploaded_file.name)
+    elif uploaded_file.name.endswith((".png", ".jpg", ".jpeg")):
+        text = extract_text_from_image(uploaded_file.name)
+    else:
+        text = uploaded_file.read().decode("utf-8")
+    if query.lower() == "summarize":
+        summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
+        return summary[0]["summary_text"]
+    # Find the best-matching answer
+    answer = qa_pipeline(question=query, context=text)
+    return answer["answer"]
+# Gradio UI
+with gr.Blocks() as app:
+    gr.Markdown("# 📄 Smart Document Explorer")
+    with gr.Row():
+        uploaded_file = gr.File(label="Upload Document (PDF, Image, or Text)")
+        query = gr.Textbox(label="Ask a question (or type 'summarize')", placeholder="What is this document about?")
+    output_text = gr.Textbox(label="AI Response")
+    submit_btn = gr.Button("Process Document")
+    submit_btn.click(document_processor, inputs=[uploaded_file, query], outputs=output_text)
+app.launch()