Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,99 +1,97 @@
|
|
1 |
-
from fastapi import FastAPI, UploadFile, Form
|
2 |
-
from fastapi.responses import RedirectResponse
|
|
|
|
|
|
|
3 |
import os
|
4 |
-
import shutil
|
5 |
from PIL import Image
|
6 |
-
|
7 |
-
|
|
|
|
|
8 |
import easyocr
|
9 |
-
import torch
|
10 |
-
import tempfile
|
11 |
-
import gradio as gr
|
12 |
-
import numpy as np
|
13 |
-
|
14 |
-
app = FastAPI()
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
# Load VQA Model
|
19 |
-
vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
20 |
-
vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
21 |
-
|
22 |
-
# Load image captioning model
|
23 |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
|
|
24 |
|
|
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
def classify_question(question: str):
|
29 |
-
question_lower = question.lower()
|
30 |
-
if any(word in question_lower for word in ["text", "say", "written", "read"]):
|
31 |
-
return "ocr"
|
32 |
-
elif any(word in question_lower for word in ["caption", "describe", "what is in the image"]):
|
33 |
-
return "caption"
|
34 |
-
else:
|
35 |
-
return "vqa"
|
36 |
-
|
37 |
-
def answer_question_from_image(image, question):
|
38 |
-
if image is None or not question.strip():
|
39 |
-
return "Please upload an image and ask a question.", None
|
40 |
-
|
41 |
-
mode = classify_question(question)
|
42 |
-
|
43 |
-
if mode == "ocr":
|
44 |
-
try:
|
45 |
-
result = reader.readtext(np.array(image))
|
46 |
-
text = " ".join([entry[1] for entry in result])
|
47 |
-
answer = text.strip() or "No readable text found."
|
48 |
-
except Exception as e:
|
49 |
-
answer = f"OCR Error: {e}"
|
50 |
-
|
51 |
-
elif mode == "caption":
|
52 |
-
try:
|
53 |
-
answer = captioner(image)[0]['generated_text']
|
54 |
-
except Exception as e:
|
55 |
-
answer = f"Captioning error: {e}"
|
56 |
-
|
57 |
-
else:
|
58 |
-
try:
|
59 |
-
inputs = vqa_processor(image, question, return_tensors="pt")
|
60 |
-
with torch.no_grad():
|
61 |
-
outputs = vqa_model(**inputs)
|
62 |
-
predicted_id = outputs.logits.argmax(-1).item()
|
63 |
-
answer = vqa_model.config.id2label[predicted_id]
|
64 |
-
except Exception as e:
|
65 |
-
answer = f"VQA error: {e}"
|
66 |
-
|
67 |
try:
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
except Exception as e:
|
73 |
-
return f"
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
)
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
@app.get("/")
|
98 |
-
def
|
99 |
-
return RedirectResponse(url="/")
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File, Form
|
2 |
+
from fastapi.responses import RedirectResponse
|
3 |
+
import gradio as gr
|
4 |
+
from transformers import pipeline
|
5 |
+
import tempfile
|
6 |
import os
|
|
|
7 |
from PIL import Image
|
8 |
+
import fitz # PyMuPDF
|
9 |
+
import docx
|
10 |
+
import openpyxl
|
11 |
+
from pptx import Presentation
|
12 |
import easyocr
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
# Initialize models
|
15 |
+
summarizer = pipeline("text2text-generation", model="FeruzaBoynazarovaas/my_awesome_billsum_model")
|
|
|
|
|
|
|
|
|
|
|
16 |
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
17 |
+
reader = easyocr.Reader(['en']) # For OCR
|
18 |
|
19 |
+
app = FastAPI()
|
20 |
|
21 |
+
def extract_text_from_file(file_path: str, file_type: str):
|
22 |
+
"""Extract text from different document formats"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
try:
|
24 |
+
if file_type == "pdf":
|
25 |
+
with fitz.open(file_path) as doc:
|
26 |
+
return "\n".join(page.get_text() for page in doc)
|
27 |
+
elif file_type == "docx":
|
28 |
+
doc = docx.Document(file_path)
|
29 |
+
return "\n".join(p.text for p in doc.paragraphs)
|
30 |
+
elif file_type == "pptx":
|
31 |
+
prs = Presentation(file_path)
|
32 |
+
return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text"))
|
33 |
+
elif file_type == "xlsx":
|
34 |
+
wb = openpyxl.load_workbook(file_path)
|
35 |
+
return "\n".join(str(cell.value) for sheet in wb for row in sheet for cell in row)
|
36 |
+
else:
|
37 |
+
return "Unsupported file format"
|
38 |
except Exception as e:
|
39 |
+
return f"Error reading file: {str(e)}"
|
40 |
+
|
41 |
+
def process_document(file):
|
42 |
+
"""Handle document upload and summarization"""
|
43 |
+
# Save temp file
|
44 |
+
file_ext = os.path.splitext(file.name)[1][1:].lower()
|
45 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
|
46 |
+
tmp.write(file.read())
|
47 |
+
tmp_path = tmp.name
|
48 |
+
|
49 |
+
# Extract and summarize
|
50 |
+
text = extract_text_from_file(tmp_path, file_ext)
|
51 |
+
summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['generated_text']
|
52 |
+
|
53 |
+
# Cleanup
|
54 |
+
os.unlink(tmp_path)
|
55 |
+
return summary
|
56 |
+
|
57 |
+
def process_image(image):
|
58 |
+
"""Handle image captioning and OCR"""
|
59 |
+
img = Image.open(image)
|
60 |
+
|
61 |
+
# Get caption
|
62 |
+
caption = captioner(img)[0]['generated_text']
|
63 |
+
|
64 |
+
# Get OCR text
|
65 |
+
ocr_result = reader.readtext(img)
|
66 |
+
ocr_text = " ".join([res[1] for res in ocr_result])
|
67 |
+
|
68 |
+
return {
|
69 |
+
"caption": caption,
|
70 |
+
"ocr_text": ocr_text if ocr_text else "No readable text found"
|
71 |
+
}
|
72 |
+
|
73 |
+
# Gradio Interface
|
74 |
+
with gr.Blocks() as demo:
|
75 |
+
gr.Markdown("# 📄 Document & Image Analysis Web Service")
|
76 |
+
|
77 |
+
with gr.Tab("Document Summarization"):
|
78 |
+
doc_input = gr.File(label="Upload Document (PDF, DOCX, PPTX, XLSX)")
|
79 |
+
doc_output = gr.Textbox(label="Summary")
|
80 |
+
doc_button = gr.Button("Summarize")
|
81 |
+
|
82 |
+
with gr.Tab("Image Analysis"):
|
83 |
+
img_input = gr.Image(type="filepath", label="Upload Image")
|
84 |
+
with gr.Accordion("Results", open=True):
|
85 |
+
caption_output = gr.Textbox(label="Image Caption")
|
86 |
+
ocr_output = gr.Textbox(label="Extracted Text")
|
87 |
+
img_button = gr.Button("Analyze")
|
88 |
+
|
89 |
+
doc_button.click(process_document, inputs=doc_input, outputs=doc_output)
|
90 |
+
img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output])
|
91 |
+
|
92 |
+
# Mount Gradio app
|
93 |
+
app = gr.mount_gradio_app(app, demo, path="/")
|
94 |
|
95 |
@app.get("/")
|
96 |
+
def redirect_to_gradio():
|
97 |
+
return RedirectResponse(url="/")
|