Spaces:

xujinheng666
/

Assignment

Sleeping

App Files Files Community

xujinheng666 commited on Mar 3

Commit

df8b867

verified ·

1 Parent(s): 0d8b59b

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -40

app.py CHANGED Viewed

@@ -1,49 +1,77 @@
 import streamlit as st
 from transformers import pipeline
 # function part
-# img2text
-def img2text(url):
-    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-    text = image_to_text_model(url)[0]["generated_text"]
     return text
-# text2story
-def text2story(text):
-    story_text = pipeline("text-generation", model="nomic-ai/gpt4all-j")
-    return story_text
-# text2audio
-def text2audio(story_text):
-    audio_data = pipeline("text-to-speech", model="facebook/mms-tts-yor")
-#main part
-st.set_page_config(page_title="Your Image to Audio Story",
-                   page_icon="🦜")
-st.header("Turn Your Image to Audio Story")
-uploaded_file = st.file_uploader("Select an Image...")
-if uploaded_file is not None:
-    print(uploaded_file)
-    bytes_data = uploaded_file.getvalue()
-    with open(uploaded_file.name, "wb") as file:
-        file.write(bytes_data)
-    st.image(uploaded_file, caption="Uploaded Image",
-             use_column_width=True)
-    #Stage 1: Image to Text
-    st.text('Processing img2text...')
-    scenario = img2text(uploaded_file.name)
-    st.write(scenario)
-    #Stage 2: Text to Story
-    st.text('Generating a story...')
-    story = text2story(scenario, max_length=150, num_return_sequences=1)
-    st.write(story)
-    #Stage 3: Story to Audio data
-    st.text('Generating audio data...')
-    audio_data =text2audio(story)

 import streamlit as st
 from transformers import pipeline
+import soundfile as sf
+import os
+# Cache models to avoid reloading on every interaction
+@st.cache_resource
+def load_models():
+    return {
+        "image_to_text": pipeline("image-to-text", model="Salesforce/blip-image-captioning-base"),
+        "story_gen": pipeline("text-generation", model="nomic-ai/gpt4all-j"),
+        "text_to_speech": pipeline("text-to-speech", model="facebook/mms-tts-yor")
+    }
 # function part
+def img2text(url, processor):
+    text = processor(url)[0]["generated_text"]
     return text
+def text2story(text, generator, max_length=150):
+    prompt = f"Create a children's story based on: {text}"
+    story = generator(
+        prompt,
+        max_length=max_length,
+        temperature=0.7,
+        do_sample=True
+    )[0]['generated_text']
+    return story[:1000]  # Safety truncation
+def text2audio(text, synthesizer):
+    audio = synthesizer(text)
+    return audio
+# main part
+def main():
+    st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
+    st.header("Turn Your Image to Audio Story")
+    # Load models once
+    models = load_models()
+    uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
+    if uploaded_file is not None:
+        # Save uploaded file temporarily
+        temp_path = f"temp_{uploaded_file.name}"
+        with open(temp_path, "wb") as f:
+            f.write(uploaded_file.getvalue())
+        st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
+        # Stage 1: Image to Text
+        with st.spinner('Generating caption...'):
+            scenario = img2text(temp_path, models["image_to_text"])
+            st.subheader("Image Caption")
+            st.write(scenario)
+        # Stage 2: Text to Story
+        with st.spinner('Creating story...'):
+            story = text2story(scenario, models["story_gen"])
+            st.subheader("Generated Story")
+            st.write(story)
+        # Stage 3: Story to Audio
+        with st.spinner('Generating audio...'):
+            audio = text2audio(story, models["text_to_speech"])
+            sf.write("temp_audio.wav", audio["audio"], samplerate=audio["sampling_rate"])
+            st.subheader("Audio Story")
+            st.audio("temp_audio.wav")
+        # Clean up temp files
+        os.remove(temp_path)
+        os.remove("temp_audio.wav")
+if __name__ == "__main__":
+    main()