someshb07 commited on
Commit
7a0d264
·
verified ·
1 Parent(s): 5040f84

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from PIL import Image
4
+ import scipy.io.wavfile as wavfile
5
+ import numpy as np
6
+
7
+ # Use a pipeline as a high-level helper
8
+ from transformers import pipeline
9
+
10
+ pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
11
+
12
+ # Use a pipeline as a high-level helper
13
+ from transformers import pipeline
14
+
15
+ narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
16
+
17
+ def generate_audio(text):
18
+ narrated_text = narrator(text)
19
+ wavfile.write("output.wav", rate=narrated_text['sampling_rate'],
20
+ data= narrated_text['audio'][0])
21
+ return 'output.wav'
22
+
23
+
24
+ def caption_my_image(imagee):
25
+ # Ensure NumPy is imported and correctly referenced
26
+ if isinstance(imagee, np.ndarray):
27
+ imagee = Image.fromarray(imagee) # Convert NumPy array to PIL Image
28
+ elif not isinstance(imagee, Image.Image):
29
+ raise TypeError("Unsupported image format. Please upload a valid image.")
30
+
31
+ imagee = imagee.convert('RGB')
32
+ caption = pipe(imagee)
33
+ final_caption = caption[0]['generated_text']
34
+ return generate_audio(final_caption)
35
+
36
+ demo = gr.Interface(fn=caption_my_image,
37
+ inputs=[gr.Image(label='Upload an image to know the story behind it')],
38
+ outputs=[gr.Audio(label='Play the narration of an image')],
39
+ title="Here Image narration in real time",
40
+ description='This will narrate the description of the image'
41
+
42
+ )
43
+
44
+ demo.launch(share='True', debug=True)
45
+