File size: 3,372 Bytes
d4ff40b
348f72a
 
d4ff40b
 
 
 
 
 
 
348f72a
d4ff40b
 
 
 
2b97f42
d4ff40b
 
348f72a
d4ff40b
348f72a
d4ff40b
 
 
 
 
 
 
 
 
348f72a
8d03f81
 
 
 
 
 
348f72a
8d03f81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be244f0
 
 
 
 
 
 
 
 
 
 
d4ff40b
 
 
 
348f72a
d4ff40b
 
 
 
 
 
 
 
cd9cb87
 
d4ff40b
 
 
 
 
cd9cb87
d4ff40b
8d03f81
d4ff40b
cd9cb87
d4ff40b
cd9cb87
d4ff40b
cd9cb87
d4ff40b
 
 
cd9cb87
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# import part
import streamlit as st
from transformers import pipeline
import textwrap
import numpy as np
import soundfile as sf
import tempfile
import os
from PIL import Image
import string

# Initialize pipelines with caching
@st.cache_resource
def load_pipelines():
    captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
    storyer = pipeline("text-generation", model="gpt2")
    tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
    return captioner, storyer, tts

captioner, storyer, tts = load_pipelines()

# Function part
# Function to generate content from an image
def generate_content(image):
    pil_image = Image.open(image)
    
    # Generate caption
    caption = captioner(pil_image)[0]["generated_text"]
    st.write("**🌟 What's in the picture: 🌟**")
    st.write(caption)

# Create prompt for story
prompt = (
    f"Write a funny, interesting children's story centered on this scene: {caption}\n"
    f"Story in third-person narrative, describing this scene exactly: {caption} "
    f"Mention the exact place, location, or venue within {caption}. "
    f"Avoid numbers, random letter combinations, and single-letter words.")

# Generate raw story with optimized parameters
raw = storyer(
    prompt,
    max_new_tokens=100,  
    temperature=0.6,    
    top_p=0.85,          
    no_repeat_ngram_size=0, 
    return_full_text=False
)[0]["generated_text"].strip()

# Combine cleaning and word trimming in one step
# Use regex to keep only allowed characters and remove single-letter words
allowed_pattern = re.compile(r'[a-zA-Z0-9.,!?"\'-]+\b(?<!\b\w\b)')
clean_raw = ' '.join(word for word in re.findall(allowed_pattern, raw) if len(word) > 1)

def generate_story(raw, caption, tts):  
    # Split into words and trim to 100 words
    words = raw.split()  
    story = " ".join(words[:100])
    
    # Clean the story using clean_generated_story
    story = clean_generated_story(raw)  
    
    # Display story in Streamlit
    st.write("**πŸ“– Your funny story: πŸ“–**")
    st.write(story)
    
    # Generate audio from cleaned story
    chunks = textwrap.wrap(story, width=200)
    audio = np.concatenate([tts(chunk)["audio"].squeeze() for chunk in chunks])

    # Save audio to temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
        sf.write(temp_file.name, audio, tts.model.config.sampling_rate)
        temp_file_path = temp_file.name

    return caption, story, temp_file_path

# Streamlit UI
st.title("😎Story Maker")
st.markdown("Upload a picture, I will generate a story for you")

uploaded_image = st.file_uploader("Choose your picture", type=["jpg", "jpeg", "png"])

# Streamlit UI (modified image display section)
if uploaded_image is None:
    st.image("https://example.com/placeholder_image.jpg", caption="Upload your picture here!", use_container_width=True)
else:
    st.image(uploaded_image, caption="Your Picture ", use_container_width=True)

if st.button("Generate a story"):
    if uploaded_image is not None:
        with st.spinner("Processing"):
            caption, story, audio_path = generate_content(uploaded_image)
            st.success(" Your story is ready!😊")
            st.audio(audio_path, format="audio/wav")
            os.remove(audio_path)
    else:
        st.warning("Please upload a picture first! ")