Spaces:
Running
Running
from fastapi import FastAPI, UploadFile, File | |
from fastapi.responses import RedirectResponse | |
import gradio as gr | |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer | |
import tempfile | |
import os | |
from PIL import Image | |
import fitz # PyMuPDF | |
import docx | |
import easyocr | |
app = FastAPI() | |
# Lightweight model choices | |
SUMMARIZATION_MODEL = "facebook/bart-large-cnn" # 500MB | |
IMAGE_CAPTIONING_MODEL = "Salesforce/blip-image-captioning-base" # 300MB | |
# Initialize models | |
try: | |
summarizer = pipeline( | |
"summarization", | |
model=SUMMARIZATION_MODEL, | |
device="cpu" | |
) | |
except Exception as e: | |
print(f"Error loading summarizer: {e}") | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # Fallback 250MB model | |
captioner = pipeline( | |
"image-to-text", | |
model=IMAGE_CAPTIONING_MODEL, | |
device="cpu" | |
) | |
reader = easyocr.Reader(['en']) # Lightweight OCR | |
def extract_text_from_file(file_path: str, file_type: str): | |
"""Extract text from different document formats""" | |
try: | |
if file_type == "pdf": | |
with fitz.open(file_path) as doc: | |
return "\n".join(page.get_text() for page in doc) | |
elif file_type == "docx": | |
doc = docx.Document(file_path) | |
return "\n".join(p.text for p in doc.paragraphs) | |
else: | |
return "Unsupported file format (only PDF/DOCX supported in lightweight version)" | |
except Exception as e: | |
return f"Error reading file: {str(e)}" | |
def process_document(file): | |
"""Handle document summarization""" | |
try: | |
file_ext = os.path.splitext(file.name)[1][1:].lower() | |
if file_ext not in ["pdf", "docx"]: | |
return "Lightweight version only supports PDF and DOCX" | |
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp: | |
tmp.write(file.read()) | |
tmp_path = tmp.name | |
text = extract_text_from_file(tmp_path, file_ext) | |
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text'] | |
os.unlink(tmp_path) | |
return summary | |
except Exception as e: | |
return f"Processing error: {str(e)}" | |
def process_image(image): | |
"""Handle image captioning and OCR""" | |
try: | |
img = Image.open(image) | |
# Get caption | |
caption = captioner(img)[0]['generated_text'] | |
# Get OCR text | |
ocr_result = reader.readtext(img) | |
ocr_text = " ".join([res[1] for res in ocr_result]) | |
return { | |
"caption": caption, | |
"ocr_text": ocr_text if ocr_text else "No readable text found" | |
} | |
except Exception as e: | |
return {"error": str(e)} | |
# Gradio Interface | |
with gr.Blocks(title="Lightweight Document & Image Analysis") as demo: | |
gr.Markdown("## π Lightweight Document & Image Analysis") | |
with gr.Tab("Document Summarization"): | |
gr.Markdown("Supports PDF and DOCX files (max 10MB)") | |
doc_input = gr.File(label="Upload Document", file_types=[".pdf", ".docx"]) | |
doc_output = gr.Textbox(label="Summary") | |
doc_button = gr.Button("Summarize") | |
with gr.Tab("Image Analysis"): | |
gr.Markdown("Get captions and extracted text from images") | |
img_input = gr.Image(type="filepath", label="Upload Image") | |
with gr.Accordion("Results", open=False): | |
caption_output = gr.Textbox(label="Image Caption") | |
ocr_output = gr.Textbox(label="Extracted Text") | |
img_button = gr.Button("Analyze") | |
doc_button.click(process_document, inputs=doc_input, outputs=doc_output) | |
img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output]) | |
# Mount Gradio app | |
app = gr.mount_gradio_app(app, demo, path="/") | |
def redirect_to_interface(): | |
return RedirectResponse(url="/") |