Spaces:
Running
Running
Update appImage.py
Browse files- appImage.py +34 -7
appImage.py
CHANGED
@@ -1,27 +1,54 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline
|
3 |
import easyocr
|
4 |
from fastapi import FastAPI
|
5 |
-
from fastapi.responses import RedirectResponse
|
6 |
import tempfile
|
7 |
import os
|
8 |
from gtts import gTTS
|
9 |
from fpdf import FPDF
|
10 |
import datetime
|
|
|
|
|
11 |
|
12 |
# Initialize components
|
13 |
app = FastAPI()
|
14 |
|
15 |
-
# Load models
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
reader = easyocr.Reader(['en', 'fr']) # English and French OCR
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def analyze_image(image_path):
|
20 |
"""Process image with both captioning and OCR"""
|
21 |
try:
|
22 |
# Generate image caption
|
23 |
-
|
24 |
-
caption = caption_result[0]['generated_text']
|
25 |
|
26 |
# Extract text with EasyOCR
|
27 |
ocr_result = reader.readtext(image_path, detail=0)
|
@@ -167,4 +194,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
|
|
167 |
|
168 |
@app.get("/")
|
169 |
def redirect_to_interface():
|
170 |
-
return RedirectResponse(url="/")
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
|
3 |
import easyocr
|
4 |
from fastapi import FastAPI
|
5 |
+
from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
|
6 |
import tempfile
|
7 |
import os
|
8 |
from gtts import gTTS
|
9 |
from fpdf import FPDF
|
10 |
import datetime
|
11 |
+
from PIL import Image
|
12 |
+
import torch
|
13 |
|
14 |
# Initialize components
|
15 |
app = FastAPI()
|
16 |
|
17 |
+
# Load models - Using microsoft/git-large-coco
|
18 |
+
try:
|
19 |
+
# Try loading the better model first
|
20 |
+
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
|
21 |
+
git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
|
22 |
+
print("Successfully loaded microsoft/git-large-coco model")
|
23 |
+
USE_GIT = True
|
24 |
+
except Exception as e:
|
25 |
+
print(f"Failed to load GIT model: {e}. Falling back to smaller model")
|
26 |
+
captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
|
27 |
+
USE_GIT = False
|
28 |
+
|
29 |
+
# Initialize EasyOCR
|
30 |
reader = easyocr.Reader(['en', 'fr']) # English and French OCR
|
31 |
|
32 |
+
def generate_caption(image_path):
|
33 |
+
"""Generate caption using the best available model"""
|
34 |
+
try:
|
35 |
+
if USE_GIT:
|
36 |
+
image = Image.open(image_path)
|
37 |
+
inputs = processor(images=image, return_tensors="pt")
|
38 |
+
outputs = git_model.generate(**inputs, max_length=50)
|
39 |
+
return processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
40 |
+
else:
|
41 |
+
result = captioner(image_path)
|
42 |
+
return result[0]['generated_text']
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Caption generation error: {e}")
|
45 |
+
return "Could not generate caption"
|
46 |
+
|
47 |
def analyze_image(image_path):
|
48 |
"""Process image with both captioning and OCR"""
|
49 |
try:
|
50 |
# Generate image caption
|
51 |
+
caption = generate_caption(image_path)
|
|
|
52 |
|
53 |
# Extract text with EasyOCR
|
54 |
ocr_result = reader.readtext(image_path, detail=0)
|
|
|
194 |
|
195 |
@app.get("/")
|
196 |
def redirect_to_interface():
|
197 |
+
return RedirectResponse(url="/")
|