Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
import soundfile as sf | |
import os | |
# Cache models to avoid reloading on every interaction | |
def load_models(): | |
return { | |
"image_to_text": pipeline("image-to-text", model="Salesforce/blip-image-captioning-base"), | |
"story_gen": pipeline("text-generation", model="Qwen/Qwen2.5-1.5B-Instruct"), | |
"text_to_speech": pipeline("text-to-speech", model="facebook/mms-tts-eng") | |
} | |
# function part | |
def img2text(url, processor): | |
text = processor(url)[0]["generated_text"] | |
return text | |
def text2story(text, generator, max_length=150): | |
prompt = f"Create a story based on: {text}" | |
story = generator( | |
prompt, | |
max_length=max_length, | |
temperature=0.7, | |
do_sample=True | |
)[0]['generated_text'] | |
return story[:1000] # Safety truncation | |
def text2audio(text, synthesizer): | |
audio = synthesizer(text) | |
return audio | |
# main part | |
def main(): | |
st.set_page_config(page_title="Your Image to Audio Story", page_icon="π¦") | |
st.header("Turn Your Image to Audio Story") | |
# Load models once | |
models = load_models() | |
uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
# Save uploaded file temporarily | |
temp_path = f"temp_{uploaded_file.name}" | |
with open(temp_path, "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True) | |
# Stage 1: Image to Text | |
with st.spinner('Generating caption...'): | |
scenario = img2text(temp_path, models["image_to_text"]) | |
st.subheader("Image Caption") | |
st.write(scenario) | |
# Stage 2: Text to Story | |
with st.spinner('Creating story...'): | |
story = text2story(scenario, models["story_gen"]) | |
st.subheader("Generated Story") | |
st.write(story) | |
# Stage 3: Story to Audio | |
with st.spinner('Generating audio...'): | |
audio = text2audio(story, models["text_to_speech"]) | |
sf.write("temp_audio.wav", audio["audio"], samplerate=audio["sampling_rate"]) | |
st.subheader("Audio Story") | |
st.audio("temp_audio.wav") | |
# Clean up temp files | |
os.remove(temp_path) | |
os.remove("temp_audio.wav") | |
if __name__ == "__main__": | |
main() |