Spaces:

adi-123
/

Image-to-Audio_Story_Generator

Running

adi-123 commited on Oct 31, 2024

Commit

b234ec5

verified ·

1 Parent(s): 5350da8

Update utils.py

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -46,16 +46,12 @@ def txt2story(prompt: str, top_k: int, top_p: float, temperature: float) -> str:
     return story
 # Text-to-speech
-def txt2speech(text: str) -> None:
     print("Initializing text-to-speech conversion...")
-    API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
-    headers = {"Authorization": f"Bearer {os.environ['HUGGINGFACEHUB_API_TOKEN']}"}
-    payloads = {'inputs': text}
-    response = requests.post(API_URL, headers=headers, json=payloads)
-    with open('audio_story.mp3', 'wb') as file:
-        file.write(response.content)
 # Get user preferences for the story
 def get_user_preferences() -> Dict[str, str]:

     return story
 # Text-to-speech
+def txt2speech(text: str):
     print("Initializing text-to-speech conversion...")
+    audio_model = pipeline("text-to-speech", model="myshell-ai/MeloTTS-English")
+    speech = captioning_model(text, max_new_tokens=200)[0]["generated_speech"]
+    return speech
 # Get user preferences for the story
 def get_user_preferences() -> Dict[str, str]: