Spaces:
Runtime error
Runtime error
# import part | |
import streamlit as st | |
from transformers import pipeline | |
import textwrap | |
import numpy as np | |
import soundfile as sf | |
import tempfile | |
import os | |
from PIL import Image | |
import string | |
# Initialize pipelines with caching | |
def load_pipelines(): | |
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") | |
storyer = pipeline("text-generation", model="aspis/gpt2-genre-story-generation") | |
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng") | |
return captioner, storyer, tts | |
captioner, storyer, tts = load_pipelines() | |
# Function part | |
# Function to generate content from an image | |
def generate_content(image): | |
pil_image = Image.open(image) | |
# Generate caption | |
caption = captioner(pil_image)[0]["generated_text"] | |
st.write("**🌟 What's in the picture: 🌟**") | |
st.write(caption) | |
# Create prompt for story | |
prompt = ( | |
f"Write a funny, interesting children's story that precisely centered on this scene {caption}\nStory:" | |
f"in third-person narrative, that describes this scene exactly: {caption} " | |
f"mention the exact place, location or venue within {caption}" | |
) | |
# Generate raw story | |
raw = storyer( | |
prompt, | |
max_new_tokens=150, | |
temperature=0.7, | |
top_p=0.9, | |
no_repeat_ngram_size=2, | |
return_full_text=False | |
)[0]["generated_text"].strip() | |
# Define allowed characters to keep (removes symbols like * and ~) | |
allowed_chars = string.ascii_letters + string.digits + " .,!?\"'-" | |
# Clean the raw story by keeping only allowed characters | |
clean_raw = ''.join(c for c in raw if c in allowed_chars) | |
# Split into words and trim to 100 words | |
words = clean_raw.split() | |
story = " ".join(words[:100]) | |
st.write("**📖 Your funny story: 📖**") | |
st.write(story) | |
# Generate audio from cleaned story | |
chunks = textwrap.wrap(story, width=200) | |
audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks]) | |
# Save audio to temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: | |
sf.write(temp_file.name, audio, tts.model.config.sampling_rate) | |
temp_file_path = temp_file.name | |
return caption, story, temp_file_path | |
# Streamlit UI | |
st.title("✨ Magic Story Maker ✨") | |
st.markdown("Upload a picture to make a funny story and hear it too! 📸") | |
uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"]) | |
# Streamlit UI (modified image display section) | |
if uploaded_image is None: | |
st.image("https://example.com/placeholder_image.jpg", caption="Upload your picture here! 📷", use_container_width=True) | |
else: | |
st.image(uploaded_image, caption="Your Picture 🌟", use_container_width=True) | |
if st.button("✨ Make My Story! ✨"): | |
if uploaded_image is not None: | |
with st.spinner("🔮 Creating your magical story..."): | |
caption, story, audio_path = generate_content(uploaded_image) | |
st.success("🎉 Your story is ready! 🎉") | |
st.audio(audio_path, format="audio/wav") | |
os.remove(audio_path) | |
else: | |
st.warning("Please upload a picture first! 📸") |