Spaces:
Sleeping
Sleeping
File size: 3,372 Bytes
d4ff40b 348f72a d4ff40b 348f72a d4ff40b 2b97f42 d4ff40b 348f72a d4ff40b 348f72a d4ff40b 348f72a 8d03f81 348f72a 8d03f81 be244f0 d4ff40b 348f72a d4ff40b cd9cb87 d4ff40b cd9cb87 d4ff40b 8d03f81 d4ff40b cd9cb87 d4ff40b cd9cb87 d4ff40b cd9cb87 d4ff40b cd9cb87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# import part
import streamlit as st
from transformers import pipeline
import textwrap
import numpy as np
import soundfile as sf
import tempfile
import os
from PIL import Image
import string
# Initialize pipelines with caching
@st.cache_resource
def load_pipelines():
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
storyer = pipeline("text-generation", model="gpt2")
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
return captioner, storyer, tts
captioner, storyer, tts = load_pipelines()
# Function part
# Function to generate content from an image
def generate_content(image):
pil_image = Image.open(image)
# Generate caption
caption = captioner(pil_image)[0]["generated_text"]
st.write("**π What's in the picture: π**")
st.write(caption)
# Create prompt for story
prompt = (
f"Write a funny, interesting children's story centered on this scene: {caption}\n"
f"Story in third-person narrative, describing this scene exactly: {caption} "
f"Mention the exact place, location, or venue within {caption}. "
f"Avoid numbers, random letter combinations, and single-letter words.")
# Generate raw story with optimized parameters
raw = storyer(
prompt,
max_new_tokens=100,
temperature=0.6,
top_p=0.85,
no_repeat_ngram_size=0,
return_full_text=False
)[0]["generated_text"].strip()
# Combine cleaning and word trimming in one step
# Use regex to keep only allowed characters and remove single-letter words
allowed_pattern = re.compile(r'[a-zA-Z0-9.,!?"\'-]+\b(?<!\b\w\b)')
clean_raw = ' '.join(word for word in re.findall(allowed_pattern, raw) if len(word) > 1)
def generate_story(raw, caption, tts):
# Split into words and trim to 100 words
words = raw.split()
story = " ".join(words[:100])
# Clean the story using clean_generated_story
story = clean_generated_story(raw)
# Display story in Streamlit
st.write("**π Your funny story: π**")
st.write(story)
# Generate audio from cleaned story
chunks = textwrap.wrap(story, width=200)
audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks])
# Save audio to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
sf.write(temp_file.name, audio, tts.model.config.sampling_rate)
temp_file_path = temp_file.name
return caption, story, temp_file_path
# Streamlit UI
st.title("πStory Maker")
st.markdown("Upload a picture, I will generate a story for you")
uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"])
# Streamlit UI (modified image display section)
if uploaded_image is None:
st.image("https://example.com/placeholder_image.jpg", caption="Upload your picture here!", use_container_width=True)
else:
st.image(uploaded_image, caption="Your Picture ", use_container_width=True)
if st.button("Generate a story"):
if uploaded_image is not None:
with st.spinner("Processing"):
caption, story, audio_path = generate_content(uploaded_image)
st.success(" Your story is ready!π")
st.audio(audio_path, format="audio/wav")
os.remove(audio_path)
else:
st.warning("Please upload a picture first! ") |