ikraamkb commited on
Commit
fac31c8
Β·
verified Β·
1 Parent(s): 81761c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -99
app.py CHANGED
@@ -1,123 +1,73 @@
1
  from fastapi import FastAPI
2
  from fastapi.responses import RedirectResponse
3
- import fitz # PyMuPDF
4
- import docx
5
- import pptx
6
- import openpyxl
7
- import io
8
- from PIL import Image
9
  import gradio as gr
 
 
 
 
10
  from transformers import pipeline
11
-
12
- # Load models
13
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
14
- image_captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
15
 
16
  app = FastAPI()
17
 
18
- # -------------------------
19
- # Extraction Functions
20
- # -------------------------
21
 
22
- def extract_text_from_pdf(data: bytes):
23
- try:
24
- with fitz.open(stream=data, filetype="pdf") as doc:
25
- return "\n".join([page.get_text() for page in doc])
26
- except Exception as e:
27
- return f"❌ PDF error: {e}"
28
 
29
- def extract_text_from_docx(data: bytes):
30
- try:
31
- doc = docx.Document(io.BytesIO(data))
32
- return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
33
- except Exception as e:
34
- return f"❌ DOCX error: {e}"
35
 
36
- def extract_text_from_pptx(data: bytes):
37
  try:
38
- prs = pptx.Presentation(io.BytesIO(data))
39
- text = []
40
- for slide in prs.slides:
41
- for shape in slide.shapes:
42
- if hasattr(shape, "text"):
43
- text.append(shape.text)
44
- return "\n".join(text)
45
- except Exception as e:
46
- return f"❌ PPTX error: {e}"
47
 
48
- def extract_text_from_xlsx(data: bytes):
49
- try:
50
- wb = openpyxl.load_workbook(io.BytesIO(data))
51
- text = []
52
- for sheet in wb.sheetnames:
53
- ws = wb[sheet]
54
- for row in ws.iter_rows(values_only=True):
55
- line = " ".join(str(cell) for cell in row if cell)
56
- text.append(line)
57
- return "\n".join(text)
58
- except Exception as e:
59
- return f"❌ XLSX error: {e}"
60
 
61
- # -------------------------
62
- # Main Logic
63
- # -------------------------
64
 
65
- def summarize_document(file):
66
- try:
67
- filename = file.name.lower()
68
- data = file.read()
69
 
70
- if filename.endswith(".pdf"):
71
- text = extract_text_from_pdf(data)
72
- elif filename.endswith(".docx"):
73
- text = extract_text_from_docx(data)
74
- elif filename.endswith(".pptx"):
75
- text = extract_text_from_pptx(data)
76
- elif filename.endswith(".xlsx"):
77
- text = extract_text_from_xlsx(data)
78
- else:
79
- return "❌ Unsupported file format."
80
 
81
- if not isinstance(text, str) or not text.strip():
82
- return "❗ No extractable text."
 
 
 
83
 
84
- summary = summarizer(text[:3000], max_length=150, min_length=30, do_sample=False)
85
- return f"πŸ“„ Summary:\n{summary[0]['summary_text']}"
86
 
87
  except Exception as e:
88
- return f"⚠️ Unexpected error: {e}"
89
-
90
- def interpret_image(image):
91
- try:
92
- return f"πŸ–ΌοΈ Caption:\n{image_captioner(image)[0]['generated_text']}"
93
- except Exception as e:
94
- return f"⚠️ Image captioning error: {e}"
95
-
96
- # -------------------------
97
- # Gradio Interfaces
98
- # -------------------------
99
-
100
- doc_summary = gr.Interface(
101
- fn=summarize_document,
102
- inputs=gr.File(label="Upload a Document"),
103
- outputs="text",
104
- title="πŸ“„ Document Summarizer"
105
  )
106
 
107
- img_caption = gr.Interface(
108
- fn=interpret_image,
109
- inputs=gr.Image(type="pil", label="Upload an Image"),
110
- outputs="text",
111
- title="πŸ–ΌοΈ Image Interpreter"
112
- )
113
-
114
- # -------------------------
115
- # FastAPI + Gradio Mount
116
- # -------------------------
117
-
118
- demo = gr.TabbedInterface([doc_summary, img_caption], ["Document Summary", "Image Captioning"])
119
- app = gr.mount_gradio_app(app, demo, path="/")
120
 
121
  @app.get("/")
122
- def root():
123
  return RedirectResponse(url="/")
 
1
  from fastapi import FastAPI
2
  from fastapi.responses import RedirectResponse
 
 
 
 
 
 
3
  import gradio as gr
4
+ from PIL import Image
5
+ import io
6
+ import numpy as np
7
+ import easyocr
8
  from transformers import pipeline
9
+ from gtts import gTTS
10
+ import tempfile
11
+ import os
 
12
 
13
  app = FastAPI()
14
 
15
+ # OCR Reader
16
+ ocr_reader = easyocr.Reader(['en'], gpu=False)
 
17
 
18
+ # Captioning and VQA Pipelines
19
+ caption_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
20
+ vqa_model = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa")
 
 
 
21
 
22
+ def process_image_question(image: Image.Image, question: str):
23
+ if image is None:
24
+ return "No image uploaded.", None
 
 
 
25
 
 
26
  try:
27
+ # Convert PIL image to numpy array
28
+ np_image = np.array(image)
 
 
 
 
 
 
 
29
 
30
+ # OCR extraction
31
+ ocr_texts = ocr_reader.readtext(np_image, detail=0)
32
+ extracted_text = "\n".join(ocr_texts)
 
 
 
 
 
 
 
 
 
33
 
34
+ # Generate caption
35
+ caption = caption_model(image)[0]['generated_text']
 
36
 
37
+ # Ask question on image using VQA
38
+ vqa_result = vqa_model(image=image, question=question)
39
+ answer = vqa_result[0]['answer']
 
40
 
41
+ # Combine results
42
+ final_output = f"πŸ–ΌοΈ Caption: {caption}\n\nπŸ“– OCR Text:\n{extracted_text}\n\n❓ Answer: {answer}"
 
 
 
 
 
 
 
 
43
 
44
+ # Convert answer to speech
45
+ tts = gTTS(text=answer)
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
47
+ tts.save(tmp.name)
48
+ audio_path = tmp.name
49
 
50
+ return final_output, audio_path
 
51
 
52
  except Exception as e:
53
+ return f"❌ Error processing image: {e}", None
54
+
55
+ gui = gr.Interface(
56
+ fn=process_image_question,
57
+ inputs=[
58
+ gr.Image(type="pil", label="Upload Image"),
59
+ gr.Textbox(lines=2, placeholder="Ask a question about the image...", label="Question")
60
+ ],
61
+ outputs=[
62
+ gr.Textbox(label="Result", lines=10),
63
+ gr.Audio(label="Answer (Audio)", type="filepath")
64
+ ],
65
+ title="🧠 Image QA with Voice",
66
+ description="Upload an image and ask any question. Supports OCR, captioning, visual QA, and audio response."
 
 
 
67
  )
68
 
69
+ app = gr.mount_gradio_app(app, gui, path="/")
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  @app.get("/")
72
+ def home():
73
  return RedirectResponse(url="/")