Spaces:
Sleeping
Sleeping
File size: 2,606 Bytes
193d60a 0815464 193d60a 0815464 b736fc0 0815464 b736fc0 0815464 b736fc0 c5af96c b736fc0 c5af96c b736fc0 c5af96c b736fc0 c5af96c b736fc0 44dd12d b736fc0 c5af96c b736fc0 c5af96c b736fc0 c5af96c b736fc0 44dd12d b736fc0 c5af96c 44dd12d b736fc0 c5af96c b736fc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import streamlit as st
from transformers import pipeline
# function part
# img2text
def img2text(url):
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# text2story
def text2story(text):
story_generator = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
story_text = story_generator(text, max_length=150, num_return_sequences=1)
return story_text[0]["generated_text"]
# text2audio
def text2audio(story_text):
tts_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
audio_data = tts_model(story_text)
return audio_data
# Main part
def main():
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
st.header("Turn Your Image to Audio Story")
if "scenario" not in st.session_state:
st.session_state.scenario = None
if "story" not in st.session_state:
st.session_state.story = None
if "audio_data" not in st.session_state:
st.session_state.audio_data = None
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None and st.session_state.scenario is None:
print(uploaded_file)
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Stage 1: Image to Text
st.text('Processing img2text...')
st.session_state.scenario = img2text(uploaded_file.name)
st.write(st.session_state.scenario)
# Stage 2: Text to Story
st.text('Generating a story...')
st.session_state.story = text2story(st.session_state.scenario)
st.write(st.session_state.story)
# Stage 3: Story to Audio Data
st.text('Generating audio data...')
st.session_state.audio_data = text2audio(st.session_state.story)
elif st.session_state.scenario:
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
st.write("Image Caption: ", st.session_state.scenario)
st.write("Generated Story: ", st.session_state.story)
# Play button (No reprocessing)
if st.session_state.audio_data and st.button("Play Audio"):
st.audio(st.session_state.audio_data['audio'],
format="audio/wav",
start_time=0,
sample_rate=st.session_state.audio_data['sampling_rate'])
if __name__ == "__main__":
main()
|