Nepjune commited on
Commit
8f2fa48
·
verified ·
1 Parent(s): eaa3654

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -12
app.py CHANGED
@@ -3,6 +3,7 @@ import torchaudio
3
  from torchaudio.transforms import Resample
4
  import torch
5
  import gradio as gr
 
6
 
7
  # Initialize TTS model from Hugging Face
8
  tts_model_name = "Kamonwan/blip-image-captioning-new"
@@ -13,6 +14,8 @@ model_id = "Kamonwan/blip-image-captioning-new"
13
  blip_model = BlipForConditionalGeneration.from_pretrained(model_id)
14
  blip_processor = BlipProcessor.from_pretrained(model_id)
15
 
 
 
16
  def generate_caption(image):
17
  # Generate caption from image using Blip model
18
  inputs = blip_processor(images=image, return_tensors="pt")
@@ -27,15 +30,43 @@ def generate_caption(image):
27
 
28
  return generated_caption, audio_path
29
 
30
- # Create a Gradio interface with an image input, a textbox output, a button, and an audio player
31
- demo = gr.Interface(
32
- fn=generate_caption,
33
- inputs=gr.Image(),
34
- outputs=[
35
- gr.Textbox(label="Generated caption"),
36
- gr.Button("Convert to Audio"),
37
- gr.Audio(type="filepath", label="Generated Audio")
38
- ],
39
- live=True
40
- )
41
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from torchaudio.transforms import Resample
4
  import torch
5
  import gradio as gr
6
+ from flask import Flask, jsonify, render_template_string
7
 
8
  # Initialize TTS model from Hugging Face
9
  tts_model_name = "Kamonwan/blip-image-captioning-new"
 
14
  blip_model = BlipForConditionalGeneration.from_pretrained(model_id)
15
  blip_processor = BlipProcessor.from_pretrained(model_id)
16
 
17
+ app = Flask(__name__)
18
+
19
  def generate_caption(image):
20
  # Generate caption from image using Blip model
21
  inputs = blip_processor(images=image, return_tensors="pt")
 
30
 
31
  return generated_caption, audio_path
32
 
33
+ @app.route('/generate_caption', methods=['POST'])
34
+ def generate_caption_api():
35
+ image = request.files['image'].read()
36
+ generated_caption, audio_path = generate_caption(image)
37
+ return jsonify({'generated_caption': generated_caption, 'audio_path': audio_path})
38
+
39
+ @app.route('/')
40
+ def index():
41
+ return render_template_string("""
42
+ <!DOCTYPE html>
43
+ <html lang="en">
44
+ <head>
45
+ <meta charset="UTF-8">
46
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
47
+ <title>Gradio Interface</title>
48
+ </head>
49
+ <body>
50
+ <h1>Gradio Interface</h1>
51
+ {{ gr_interface|safe }}
52
+ </body>
53
+ </html>
54
+ """, gr_interface=demo.get_interface())
55
+
56
+ if __name__ == '__main__':
57
+ demo = gr.Interface(
58
+ fn=generate_caption,
59
+ inputs=gr.Image(),
60
+ outputs=[
61
+ gr.Textbox(label="Generated caption"),
62
+ gr.Button("Convert to Audio"),
63
+ gr.Audio(type="file", label="Generated Audio")
64
+ ],
65
+ live=True
66
+ )
67
+
68
+ # Start Gradio interface
69
+ demo.launch(share=True)
70
+
71
+ # Start Flask app
72
+ app.run(host='0.0.0.0', port=5000)