Spaces:

ikraamkb
/

Summarization

Running

App Files Files Community

ikraamkb commited on 11 days ago

Commit

5e30a65

verified ·

1 Parent(s): ea282f7

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -42

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from fastapi import FastAPI, UploadFile, File, Form
 from fastapi.responses import RedirectResponse
 import gradio as gr
-from transformers import pipeline
 import tempfile
 import os
 from PIL import Image
@@ -11,13 +11,27 @@ import openpyxl
 from pptx import Presentation
 import easyocr
-# Initialize models
-summarizer = pipeline("text2text-generation", model="FeruzaBoynazarovaas/my_awesome_billsum_model")
-captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
-reader = easyocr.Reader(['en'])  # For OCR
 app = FastAPI()
 def extract_text_from_file(file_path: str, file_type: str):
     """Extract text from different document formats"""
     try:
@@ -39,59 +53,53 @@ def extract_text_from_file(file_path: str, file_type: str):
         return f"Error reading file: {str(e)}"
 def process_document(file):
-    """Handle document upload and summarization"""
-    # Save temp file
-    file_ext = os.path.splitext(file.name)[1][1:].lower()
-    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
-        tmp.write(file.read())
-        tmp_path = tmp.name
-    # Extract and summarize
-    text = extract_text_from_file(tmp_path, file_ext)
-    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['generated_text']
-    # Cleanup
-    os.unlink(tmp_path)
-    return summary
 def process_image(image):
-    """Handle image captioning and OCR"""
-    img = Image.open(image)
-    # Get caption
-    caption = captioner(img)[0]['generated_text']
-    # Get OCR text
-    ocr_result = reader.readtext(img)
-    ocr_text = " ".join([res[1] for res in ocr_result])
-    return {
-        "caption": caption,
-        "ocr_text": ocr_text if ocr_text else "No readable text found"
-    }
 # Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("# 📄 Document & Image Analysis Web Service")
     with gr.Tab("Document Summarization"):
-        doc_input = gr.File(label="Upload Document (PDF, DOCX, PPTX, XLSX)")
         doc_output = gr.Textbox(label="Summary")
         doc_button = gr.Button("Summarize")
     with gr.Tab("Image Analysis"):
         img_input = gr.Image(type="filepath", label="Upload Image")
-        with gr.Accordion("Results", open=True):
-            caption_output = gr.Textbox(label="Image Caption")
-            ocr_output = gr.Textbox(label="Extracted Text")
         img_button = gr.Button("Analyze")
     doc_button.click(process_document, inputs=doc_input, outputs=doc_output)
     img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output])
-# Mount Gradio app
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
-def redirect_to_gradio():
     return RedirectResponse(url="/")

+from fastapi import FastAPI, UploadFile, File
 from fastapi.responses import RedirectResponse
 import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 import tempfile
 import os
 from PIL import Image
 from pptx import Presentation
 import easyocr
 app = FastAPI()
+# Initialize models with error handling
+try:
+    # Load summarization model directly with tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("FeruzaBoynazarovaas/my_awesome_billsum_model", use_fast=False)
+    model = AutoModelForSeq2SeqLM.from_pretrained("FeruzaBoynazarovaas/my_awesome_billsum_model")
+    summarizer = pipeline(
+        "text2text-generation",
+        model=model,
+        tokenizer=tokenizer
+    )
+except Exception as e:
+    print(f"Error loading summarizer: {e}")
+    # Fallback to a default model if custom fails
+    summarizer = pipeline("text2text-generation", model="t5-small")
+# Other models (these should work fine)
+captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+reader = easyocr.Reader(['en'])
 def extract_text_from_file(file_path: str, file_type: str):
     """Extract text from different document formats"""
     try:
         return f"Error reading file: {str(e)}"
 def process_document(file):
+    try:
+        file_ext = os.path.splitext(file.name)[1][1:].lower()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_ext}") as tmp:
+            tmp.write(file.read())
+            tmp_path = tmp.name
+        text = extract_text_from_file(tmp_path, file_ext)
+        summary = summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['generated_text']
+        os.unlink(tmp_path)
+        return summary
+    except Exception as e:
+        return f"Processing error: {str(e)}"
 def process_image(image):
+    try:
+        img = Image.open(image)
+        caption = captioner(img)[0]['generated_text']
+        ocr_result = reader.readtext(img)
+        ocr_text = " ".join([res[1] for res in ocr_result])
+        return {
+            "caption": caption,
+            "ocr_text": ocr_text if ocr_text else "No readable text found"
+        }
+    except Exception as e:
+        return {"error": str(e)}
 # Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 📄 Document & Image Analysis")
     with gr.Tab("Document Summarization"):
+        doc_input = gr.File(label="Upload Document")
         doc_output = gr.Textbox(label="Summary")
         doc_button = gr.Button("Summarize")
     with gr.Tab("Image Analysis"):
         img_input = gr.Image(type="filepath", label="Upload Image")
+        caption_output = gr.Textbox(label="Image Caption")
+        ocr_output = gr.Textbox(label="Extracted Text")
         img_button = gr.Button("Analyze")
     doc_button.click(process_document, inputs=doc_input, outputs=doc_output)
     img_button.click(process_image, inputs=img_input, outputs=[caption_output, ocr_output])
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
+def redirect():
     return RedirectResponse(url="/")