Spaces:
Build error
Build error
import torch | |
import gradio as gr | |
from PIL import Image | |
import scipy.io.wavfile as wavfile | |
import numpy as np | |
# Use a pipeline as a high-level helper | |
from transformers import pipeline | |
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") | |
# Use a pipeline as a high-level helper | |
from transformers import pipeline | |
narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs") | |
def generate_audio(text): | |
narrated_text = narrator(text) | |
wavfile.write("output.wav", rate=narrated_text['sampling_rate'], | |
data= narrated_text['audio'][0]) | |
return 'output.wav' | |
def caption_my_image(imagee): | |
# Ensure NumPy is imported and correctly referenced | |
if isinstance(imagee, np.ndarray): | |
imagee = Image.fromarray(imagee) # Convert NumPy array to PIL Image | |
elif not isinstance(imagee, Image.Image): | |
raise TypeError("Unsupported image format. Please upload a valid image.") | |
imagee = imagee.convert('RGB') | |
caption = pipe(imagee) | |
final_caption = caption[0]['generated_text'] | |
return generate_audio(final_caption) | |
demo = gr.Interface(fn=caption_my_image, | |
inputs=[gr.Image(label='Upload an image to know the story behind it')], | |
outputs=[gr.Audio(label='Play the narration of an image')], | |
title="Here Image narration in real time", | |
description='This will narrate the description of the image' | |
) | |
demo.launch(share='True', debug=True) | |