Spaces:

someshb07
/

8_image_captioning

Build error

App Files Files Community

someshb07 commited on Feb 15

Commit

7a0d264

verified ·

1 Parent(s): 5040f84

Create app.py

Browse files

Files changed (1) hide show

app.py +45 -0

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+import gradio as gr
+from PIL import Image
+import scipy.io.wavfile as wavfile
+import numpy as np
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
+def generate_audio(text):
+    narrated_text = narrator(text)
+    wavfile.write("output.wav", rate=narrated_text['sampling_rate'],
+                 data= narrated_text['audio'][0])
+    return 'output.wav'
+def caption_my_image(imagee):
+    # Ensure NumPy is imported and correctly referenced
+    if isinstance(imagee, np.ndarray):
+        imagee = Image.fromarray(imagee)  # Convert NumPy array to PIL Image
+    elif not isinstance(imagee, Image.Image):
+        raise TypeError("Unsupported image format. Please upload a valid image.")
+    imagee = imagee.convert('RGB')
+    caption = pipe(imagee)
+    final_caption = caption[0]['generated_text']
+    return generate_audio(final_caption)
+demo = gr.Interface(fn=caption_my_image,
+                    inputs=[gr.Image(label='Upload an image to know the story behind it')],
+                    outputs=[gr.Audio(label='Play the narration of an image')],
+                    title="Here Image narration in real time",
+                    description='This will narrate the description of the image'
+)
+demo.launch(share='True', debug=True)