xujinheng666 commited on
Commit
839ccc8
·
verified ·
1 Parent(s): 44dd12d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import streamlit as st
2
  from transformers import pipeline
 
 
3
 
4
  # function part
5
  # img2text
@@ -14,12 +16,11 @@ def text2story(text):
14
  story_text = story_generator(text, max_length=150, num_return_sequences=1)
15
  return story_text[0]["generated_text"]
16
 
17
- # text2audio
18
- def text2audio(story_text):
19
- tts_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
20
- audio_data = tts_model(story_text)
21
- return audio_data
22
-
23
 
24
  # Main part
25
  def main():
@@ -30,8 +31,8 @@ def main():
30
  st.session_state.scenario = None
31
  if "story" not in st.session_state:
32
  st.session_state.story = None
33
- if "audio_data" not in st.session_state:
34
- st.session_state.audio_data = None
35
 
36
  uploaded_file = st.file_uploader("Select an Image...")
37
 
@@ -53,9 +54,10 @@ def main():
53
  st.session_state.story = text2story(st.session_state.scenario)
54
  st.write(st.session_state.story)
55
 
56
- # Stage 3: Story to Audio Data
57
- st.text('Generating audio data...')
58
- st.session_state.audio_data = text2audio(st.session_state.story)
 
59
 
60
  elif st.session_state.scenario:
61
  st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
@@ -63,11 +65,8 @@ def main():
63
  st.write("Generated Story: ", st.session_state.story)
64
 
65
  # Play button (No reprocessing)
66
- if st.session_state.audio_data and st.button("Play Audio"):
67
- st.audio(st.session_state.audio_data['audio'],
68
- format="audio/wav",
69
- start_time=0,
70
- sample_rate=st.session_state.audio_data['sampling_rate'])
71
 
72
  if __name__ == "__main__":
73
  main()
 
1
  import streamlit as st
2
  from transformers import pipeline
3
+ from gtts import gTTS
4
+ import os
5
 
6
  # function part
7
  # img2text
 
16
  story_text = story_generator(text, max_length=150, num_return_sequences=1)
17
  return story_text[0]["generated_text"]
18
 
19
+ # text2audio using gTTS
20
+ def text2audio(story_text, filename="output.mp3"):
21
+ tts = gTTS(text=story_text, lang='en')
22
+ tts.save(filename)
23
+ return filename
 
24
 
25
  # Main part
26
  def main():
 
31
  st.session_state.scenario = None
32
  if "story" not in st.session_state:
33
  st.session_state.story = None
34
+ if "audio_file" not in st.session_state:
35
+ st.session_state.audio_file = None
36
 
37
  uploaded_file = st.file_uploader("Select an Image...")
38
 
 
54
  st.session_state.story = text2story(st.session_state.scenario)
55
  st.write(st.session_state.story)
56
 
57
+ # Stage 3: Story to Audio File
58
+ st.text('Generating audio...')
59
+ audio_filename = text2audio(st.session_state.story)
60
+ st.session_state.audio_file = audio_filename
61
 
62
  elif st.session_state.scenario:
63
  st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
 
65
  st.write("Generated Story: ", st.session_state.story)
66
 
67
  # Play button (No reprocessing)
68
+ if st.session_state.audio_file and st.button("Play Audio"):
69
+ st.audio(st.session_state.audio_file, format="audio/mp3")
 
 
 
70
 
71
  if __name__ == "__main__":
72
  main()