xujinheng666 commited on
Commit
0815464
·
verified ·
1 Parent(s): 109ddb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -68
app.py CHANGED
@@ -1,77 +1,52 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
- import soundfile as sf
4
- import os
5
-
6
- # Cache models to avoid reloading on every interaction
7
- @st.cache_resource
8
- def load_models():
9
- return {
10
- "image_to_text": pipeline("image-to-text", model="Salesforce/blip-image-captioning-base"),
11
- "story_gen": pipeline("text-generation", model="distilbert/distilgpt2"),
12
- "text_to_speech": pipeline("text-to-speech", model="facebook/mms-tts-eng")
13
- }
14
 
15
  # function part
16
- def img2text(url, processor):
17
- text = processor(url)[0]["generated_text"]
 
 
18
  return text
19
 
20
- def text2story(text, generator, max_length=150):
21
- prompt = f"Create a story based on: {text}"
22
- story = generator(
23
- prompt,
24
- max_length=max_length,
25
- temperature=0.7,
26
- do_sample=True
27
- )[0]['generated_text']
28
- return story[:1000] # Safety truncation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- def text2audio(text, synthesizer):
31
- audio = synthesizer(text)
32
- return audio
 
33
 
34
- # main part
35
- def main():
36
- st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
37
- st.header("Turn Your Image to Audio Story")
38
-
39
- # Load models once
40
- models = load_models()
41
-
42
- uploaded_file = st.file_uploader("Select an Image...", type=["jpg", "jpeg", "png"])
43
-
44
- if uploaded_file is not None:
45
- # Save uploaded file temporarily
46
- temp_path = f"temp_{uploaded_file.name}"
47
- with open(temp_path, "wb") as f:
48
- f.write(uploaded_file.getvalue())
49
-
50
- st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
51
-
52
- # Stage 1: Image to Text
53
- with st.spinner('Generating caption...'):
54
- scenario = img2text(temp_path, models["image_to_text"])
55
- st.subheader("Image Caption")
56
- st.write(scenario)
57
-
58
- # Stage 2: Text to Story
59
- with st.spinner('Creating story...'):
60
- story = text2story(scenario, models["story_gen"])
61
- st.subheader("Generated Story")
62
- st.write(story)
63
-
64
- # Stage 3: Story to Audio
65
- with st.spinner('Generating audio...'):
66
- audio = text2audio(story, models["text_to_speech"])
67
- sf.write("temp_audio.wav", audio["audio"], samplerate=audio["sampling_rate"])
68
-
69
- st.subheader("Audio Story")
70
- st.audio("temp_audio.wav")
71
-
72
- # Clean up temp files
73
- os.remove(temp_path)
74
- os.remove("temp_audio.wav")
75
 
76
- if __name__ == "__main__":
77
- main()
 
 
1
  import streamlit as st
2
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  # function part
5
+ # img2text
6
+ def img2text(url):
7
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
8
+ text = image_to_text_model(url)[0]["generated_text"]
9
  return text
10
 
11
+ # text2story
12
+ def text2story(text):
13
+ story_generator = pipeline("text-generation", model="distilgpt2") # Corrected pipeline initialization
14
+ story_text = story_generator(text, max_length=150, num_return_sequences=1) # Pass parameters here
15
+ return story_text[0]["generated_text"] # Extract generated text
16
+
17
+ # text2audio
18
+ def text2audio(story_text):
19
+ tts_model = pipeline("text-to-speech", model="facebook/mms-tts-eng") # Initialize pipeline
20
+ audio_data = tts_model(story_text) # Generate audio
21
+ return audio_data # Return generated audio
22
+
23
+
24
+ #main part
25
+
26
+ st.set_page_config(page_title="Your Image to Audio Story",
27
+ page_icon="🦜")
28
+ st.header("Turn Your Image to Audio Story")
29
+ uploaded_file = st.file_uploader("Select an Image...")
30
+
31
+ if uploaded_file is not None:
32
+ print(uploaded_file)
33
+ bytes_data = uploaded_file.getvalue()
34
+ with open(uploaded_file.name, "wb") as file:
35
+ file.write(bytes_data)
36
+
37
+ st.image(uploaded_file, caption="Uploaded Image",
38
+ use_column_width=True)
39
 
40
+ #Stage 1: Image to Text
41
+ st.text('Processing img2text...')
42
+ scenario = img2text(uploaded_file.name)
43
+ st.write(scenario)
44
 
45
+ #Stage 2: Text to Story
46
+ st.text('Generating a story...')
47
+ story = text2story(scenario)
48
+ st.write(story)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ #Stage 3: Story to Audio data
51
+ st.text('Generating audio data...')
52
+ audio_data =text2audio(story)