Spaces:

jbilcke-hf
/

ai-bedtime-story-server

Paused

jbilcke-hf HF Staff commited on Nov 13, 2023

Commit

38d7590

1 Parent(s): cb57d1e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -588,30 +588,30 @@ latent_map = {}
 latent_map["Julian"] = get_latents("voices/julian-bedtime-style-1.wav")
 latent_map["Pirate"] = get_latents("voices/pirate_by_coqui.wav")
 # Define the main function for the API endpoint that takes the input text and chatbot role
 def generate_story_and_speech(input_text, chatbot_role):
-    # We assume that other necessary components have been initialized and are ready to use here
-    # Here, we'll integrate the story generation, language detection, and speech synthesis logic
-    # Let's assume `generate_story()` is a function that generates the story based on the input text
-    # And `synthesize_speech()` is a function that synthesizes speech from text
-    story_text = generate_story(input_text, chatbot_role)
-    language = detect_language(story_text)
-    speech_audio_bytes = synthesize_speech(story_text, language)
-    # Convert the speech to base64 to include in the JSON response
     speech_audio_base64 = base64.b64encode(speech_audio_bytes).decode('utf8')
-    # Return the story and speech audio in base64 format
     return {"text": story_text, "audio": speech_audio_base64}
-# Create a Gradio Interface using only the `generate_story_and_speech()` function and the 'json' output type
-demo = gr.Interface(
     fn=generate_story_and_speech,
-    inputs=[gr.Textbox(placeholder="Enter your text here"), gr.Dropdown(choices=ROLES, label="Select Chatbot Role")],
     outputs="json"
 )
-demo.queue()
-demo.launch(debug=True)

 latent_map["Julian"] = get_latents("voices/julian-bedtime-style-1.wav")
 latent_map["Pirate"] = get_latents("voices/pirate_by_coqui.wav")
 # Define the main function for the API endpoint that takes the input text and chatbot role
 def generate_story_and_speech(input_text, chatbot_role):
+    history = [(input_text, None)]  # Initialize history with user input
+    story_text = generate_local(input_text, history)  # calls your generate_local function
+    # Serialize story_text to a single string
+    story_text = ' '.join(sentence for sentence, _ in story_text)
+    synthesized_speech = generate_speech_for_sentence(history, chatbot_role, story_text)
+    # generate_speech_for_sentence returns a tuple, where the second item is a gr.Audio object
+    speech_audio_bytes = synthesized_speech[1].data.getvalue()  # Access the BytesIO object and extract bytes
+    # Convert the speech to base64 to include in JSON response
     speech_audio_base64 = base64.b64encode(speech_audio_bytes).decode('utf8')
+    # Return JSON object with text and base64 audio
     return {"text": story_text, "audio": speech_audio_base64}
+# Define your Gradio app API
+iface = gr.Interface(
     fn=generate_story_and_speech,
+    inputs=[gr.Textbox(label="Enter your text"), gr.Dropdown(choices=ROLES, label="Chatbot Role")],
     outputs="json"
 )
+# Launch the app
+if __name__ == "__main__":
+    iface.launch(debug=True, enable_queue=True, api_mode=True)