Spaces:
Running
Running
File size: 3,406 Bytes
0d84ecf 47942ca c330600 0d84ecf bca0a86 c330600 0d84ecf 4a81c80 0d84ecf 974f8bb 0d84ecf 461e409 0d84ecf c330600 0d84ecf c330600 0d84ecf c330600 5b4fc38 0d84ecf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import RedirectResponse
import gradio as gr
from transformers import pipeline
import tempfile
import os
from PIL import Image
import fitz # PyMuPDF
import docx
import openpyxl
from pptx import Presentation
import easyocr
# Initialize models
summarizer = pipeline("text2text-generation", model="FeruzaBoynazarovaas/my_awesome_billsum_model")
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
reader = easyocr.Reader(['en']) # For OCR
app = FastAPI()
def extract_text_from_file(file_path: str, file_type: str):
"""Extract text from different document formats"""
try:
if file_type == "pdf":
with fitz.open(file_path) as doc:
return "\n".join(page.get_text() for page in doc)
elif file_type == "docx":
doc = docx.Document(file_path)
return "\n".join(p.text for p in doc.paragraphs)
elif file_type == "pptx":
prs = Presentation(file_path)
return "\n".join(shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text"))
elif file_type == "xlsx":
wb = openpyxl.load_workbook(file_path)
return "\n".join(str(cell.value) for sheet in wb for row in sheet for cell in row)
else:
return "Unsupported file format"
except Exception as e:
return f"Error reading file: {str(e)}"
def process_document(file):
"""Handle document upload and summarization"""
# Save temp file
file_ext = os.path.splitext(file.name)[1][1:].lower()
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
tmp.write(file.read())
tmp_path = tmp.name
# Extract and summarize
text = extract_text_from_file(tmp_path, file_ext)
summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['generated_text']
# Cleanup
os.unlink(tmp_path)
return summary
def process_image(image):
"""Handle image captioning and OCR"""
img = Image.open(image)
# Get caption
caption = captioner(img)[0]['generated_text']
# Get OCR text
ocr_result = reader.readtext(img)
ocr_text = " ".join([res[1] for res in ocr_result])
return {
"caption": caption,
"ocr_text": ocr_text if ocr_text else "No readable text found"
}
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# π Document & Image Analysis Web Service")
with gr.Tab("Document Summarization"):
doc_input = gr.File(label="Upload Document (PDF, DOCX, PPTX, XLSX)")
doc_output = gr.Textbox(label="Summary")
doc_button = gr.Button("Summarize")
with gr.Tab("Image Analysis"):
img_input = gr.Image(type="filepath", label="Upload Image")
with gr.Accordion("Results", open=True):
caption_output = gr.Textbox(label="Image Caption")
ocr_output = gr.Textbox(label="Extracted Text")
img_button = gr.Button("Analyze")
doc_button.click(process_document, inputs=doc_input, outputs=doc_output)
img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output])
# Mount Gradio app
app = gr.mount_gradio_app(app, demo, path="/")
@app.get("/")
def redirect_to_gradio():
return RedirectResponse(url="/") |