ikraamkb commited on
Commit
9af5fdb
·
verified ·
1 Parent(s): 6d798ab

Update appImage.py

Browse files
Files changed (1) hide show
  1. appImage.py +34 -7
appImage.py CHANGED
@@ -1,27 +1,54 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import easyocr
4
  from fastapi import FastAPI
5
- from fastapi.responses import RedirectResponse
6
  import tempfile
7
  import os
8
  from gtts import gTTS
9
  from fpdf import FPDF
10
  import datetime
 
 
11
 
12
  # Initialize components
13
  app = FastAPI()
14
 
15
- # Load models
16
- captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 
 
 
 
 
 
 
 
 
 
 
17
  reader = easyocr.Reader(['en', 'fr']) # English and French OCR
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def analyze_image(image_path):
20
  """Process image with both captioning and OCR"""
21
  try:
22
  # Generate image caption
23
- caption_result = captioner(image_path)
24
- caption = caption_result[0]['generated_text']
25
 
26
  # Extract text with EasyOCR
27
  ocr_result = reader.readtext(image_path, detail=0)
@@ -167,4 +194,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
167
 
168
  @app.get("/")
169
  def redirect_to_interface():
170
- return RedirectResponse(url="/")
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForCausalLM, pipeline
3
  import easyocr
4
  from fastapi import FastAPI
5
+ from fastapi.responses import RedirectResponse, FileResponse, JSONResponse
6
  import tempfile
7
  import os
8
  from gtts import gTTS
9
  from fpdf import FPDF
10
  import datetime
11
+ from PIL import Image
12
+ import torch
13
 
14
  # Initialize components
15
  app = FastAPI()
16
 
17
+ # Load models - Using microsoft/git-large-coco
18
+ try:
19
+ # Try loading the better model first
20
+ processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
21
+ git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
22
+ print("Successfully loaded microsoft/git-large-coco model")
23
+ USE_GIT = True
24
+ except Exception as e:
25
+ print(f"Failed to load GIT model: {e}. Falling back to smaller model")
26
+ captioner = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
27
+ USE_GIT = False
28
+
29
+ # Initialize EasyOCR
30
  reader = easyocr.Reader(['en', 'fr']) # English and French OCR
31
 
32
+ def generate_caption(image_path):
33
+ """Generate caption using the best available model"""
34
+ try:
35
+ if USE_GIT:
36
+ image = Image.open(image_path)
37
+ inputs = processor(images=image, return_tensors="pt")
38
+ outputs = git_model.generate(**inputs, max_length=50)
39
+ return processor.batch_decode(outputs, skip_special_tokens=True)[0]
40
+ else:
41
+ result = captioner(image_path)
42
+ return result[0]['generated_text']
43
+ except Exception as e:
44
+ print(f"Caption generation error: {e}")
45
+ return "Could not generate caption"
46
+
47
  def analyze_image(image_path):
48
  """Process image with both captioning and OCR"""
49
  try:
50
  # Generate image caption
51
+ caption = generate_caption(image_path)
 
52
 
53
  # Extract text with EasyOCR
54
  ocr_result = reader.readtext(image_path, detail=0)
 
194
 
195
  @app.get("/")
196
  def redirect_to_interface():
197
+ return RedirectResponse(url="/")