Spaces:

adi-123
/

Image-to-Audio_Story_Generator

Running

adi-123 commited on Oct 31, 2024

Commit

c544135

verified ·

1 Parent(s): eebce15

Update utils.py

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -4,6 +4,7 @@ import requests
 from transformers import pipeline
 from typing import Dict
 from together import Together
 # Image-to-text
 def img2txt(url: str) -> str:
@@ -47,15 +48,13 @@ def txt2story(prompt: str, top_k: int, top_p: float, temperature: float) -> str:
 # Text-to-speech
 def txt2speech(text: str) -> None:
-    print("Initializing MeloTTS text-to-speech conversion...")
-    pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-English")
     # Generate audio from the text
-    audio = pipe(text)
     # Save the audio output to a file
-    with open("audio_story.wav", "wb") as file:
-        file.write(audio["wav"])
 # Get user preferences for the story
 def get_user_preferences() -> Dict[str, str]:

 from transformers import pipeline
 from typing import Dict
 from together import Together
+from gtts import gTTS
 # Image-to-text
 def img2txt(url: str) -> str:
 # Text-to-speech
 def txt2speech(text: str) -> None:
+    print("Converting text to speech using gTTS...")
     # Generate audio from the text
+    tts = gTTS(text=text, lang='en')
     # Save the audio output to a file
+    tts.save("audio_story.mp3")
 # Get user preferences for the story
 def get_user_preferences() -> Dict[str, str]: