xujinheng666 commited on
Commit
df8b867
·
verified ·
1 Parent(s): 0d8b59b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -40
app.py CHANGED
@@ -1,49 +1,77 @@
1
  import streamlit as st
2
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  # function part
5
- # img2text
6
- def img2text(url):
7
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
8
- text = image_to_text_model(url)[0]["generated_text"]
9
  return text
10
 
11
- # text2story
12
- def text2story(text):
13
- story_text = pipeline("text-generation", model="nomic-ai/gpt4all-j")
14
- return story_text
15
-
16
- # text2audio
17
- def text2audio(story_text):
18
- audio_data = pipeline("text-to-speech", model="facebook/mms-tts-yor")
19
-
20
- #main part
21
-
22
- st.set_page_config(page_title="Your Image to Audio Story",
23
- page_icon="🦜")
24
- st.header("Turn Your Image to Audio Story")
25
- uploaded_file = st.file_uploader("Select an Image...")
26
-
27
- if uploaded_file is not None:
28
- print(uploaded_file)
29
- bytes_data = uploaded_file.getvalue()
30
- with open(uploaded_file.name, "wb") as file:
31
- file.write(bytes_data)
32
-
33
- st.image(uploaded_file, caption="Uploaded Image",
34
- use_column_width=True)
35
-
36
- #Stage 1: Image to Text
37
- st.text('Processing img2text...')
38
- scenario = img2text(uploaded_file.name)
39
- st.write(scenario)
40
 
41
- #Stage 2: Text to Story
42
- st.text('Generating a story...')
43
- story = text2story(scenario, max_length=150, num_return_sequences=1)
44
- st.write(story)
45
 
46
- #Stage 3: Story to Audio data
47
- st.text('Generating audio data...')
48
- audio_data =text2audio(story)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
 
 
 
1
  import streamlit as st
2
  from transformers import pipeline
3
+ import soundfile as sf
4
+ import os
5
+
6
+ # Cache models to avoid reloading on every interaction
7
+ @st.cache_resource
8
+ def load_models():
9
+ return {
10
+ "image_to_text": pipeline("image-to-text", model="Salesforce/blip-image-captioning-base"),
11
+ "story_gen": pipeline("text-generation", model="nomic-ai/gpt4all-j"),
12
+ "text_to_speech": pipeline("text-to-speech", model="facebook/mms-tts-yor")
13
+ }
14
 
15
  # function part
16
+ def img2text(url, processor):
17
+ text = processor(url)[0]["generated_text"]
 
 
18
  return text
19
 
20
+ def text2story(text, generator, max_length=150):
21
+ prompt = f"Create a children's story based on: {text}"
22
+ story = generator(
23
+ prompt,
24
+ max_length=max_length,
25
+ temperature=0.7,
26
+ do_sample=True
27
+ )[0]['generated_text']
28
+ return story[:1000] # Safety truncation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ def text2audio(text, synthesizer):
31
+ audio = synthesizer(text)
32
+ return audio
 
33
 
34
+ # main part
35
+ def main():
36
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
37
+ st.header("Turn Your Image to Audio Story")
38
+
39
+ # Load models once
40
+ models = load_models()
41
+
42
+ uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
43
+
44
+ if uploaded_file is not None:
45
+ # Save uploaded file temporarily
46
+ temp_path = f"temp_{uploaded_file.name}"
47
+ with open(temp_path, "wb") as f:
48
+ f.write(uploaded_file.getvalue())
49
+
50
+ st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
51
+
52
+ # Stage 1: Image to Text
53
+ with st.spinner('Generating caption...'):
54
+ scenario = img2text(temp_path, models["image_to_text"])
55
+ st.subheader("Image Caption")
56
+ st.write(scenario)
57
+
58
+ # Stage 2: Text to Story
59
+ with st.spinner('Creating story...'):
60
+ story = text2story(scenario, models["story_gen"])
61
+ st.subheader("Generated Story")
62
+ st.write(story)
63
+
64
+ # Stage 3: Story to Audio
65
+ with st.spinner('Generating audio...'):
66
+ audio = text2audio(story, models["text_to_speech"])
67
+ sf.write("temp_audio.wav", audio["audio"], samplerate=audio["sampling_rate"])
68
+
69
+ st.subheader("Audio Story")
70
+ st.audio("temp_audio.wav")
71
+
72
+ # Clean up temp files
73
+ os.remove(temp_path)
74
+ os.remove("temp_audio.wav")
75
 
76
+ if __name__ == "__main__":
77
+ main()