adi-123 commited on
Commit
69fb08d
Β·
verified Β·
1 Parent(s): f366ef4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -59
app.py CHANGED
@@ -10,92 +10,124 @@ from email.mime.multipart import MIMEMultipart
10
  from email.mime.text import MIMEText
11
  from email.mime.audio import MIMEAudio
12
 
 
13
  def img2txt(url: str) -> str:
14
  captioning_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
15
  text = captioning_model(url, max_new_tokens=20)[0]["generated_text"]
16
  return text
17
 
 
18
  def txt2story(prompt: str, top_k: int, top_p: float, temperature: float) -> str:
19
  client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
 
20
  story_prompt = f"Write a short story of no more than 250 words based on the following prompt: {prompt}"
 
21
  stream = client.chat.completions.create(
22
  model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
23
- messages=[{"role": "user", "content": story_prompt}],
 
 
 
 
24
  top_k=top_k,
25
  top_p=top_p,
26
  temperature=temperature,
27
  stream=True
28
  )
29
- story = ''.join(chunk.choices[0].delta.content for chunk in stream)
 
 
 
 
30
  return story
31
 
 
32
  def txt2speech(text: str) -> None:
33
  tts = gTTS(text=text, lang='en')
34
  tts.save("audio_story.mp3")
35
 
36
- def send_story_email(recipient_email: str, story_text: str, audio_file_path: str) -> bool:
37
- try:
38
- smtp_server = os.environ.get("SMTP_SERVER")
39
- smtp_port = int(os.environ.get("SMTP_PORT", 587))
40
- sender_email = os.environ.get("SENDER_EMAIL")
41
- sender_password = os.environ.get("SENDER_PASSWORD")
42
-
43
- msg = MIMEMultipart()
44
- msg['From'] = sender_email
45
- msg['To'] = recipient_email
46
- msg['Subject'] = "Your Generated Story"
47
-
48
- msg.attach(MIMEText(f"Here's your generated story:\n\n{story_text}\n\nEnjoy!", 'plain'))
49
-
50
- with open(audio_file_path, 'rb') as audio_file:
51
- audio_part = MIMEAudio(audio_file.read(), _subtype='mp3')
52
- audio_part.add_header('Content-Disposition', 'attachment', filename=os.path.basename(audio_file_path))
53
- msg.attach(audio_part)
54
-
55
- with smtplib.SMTP(smtp_server, smtp_port) as server:
56
- server.starttls()
57
- server.login(sender_email, sender_password)
58
- server.send_message(msg)
59
-
60
- return True
61
-
62
- except Exception as e:
63
- print(f"Error sending email: {str(e)}")
64
- return False
65
-
66
- def validate_email(email: str) -> bool:
67
- pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'
68
- return re.match(pattern, email) is not None
69
-
70
  def main():
71
- st.set_page_config(page_title="🎨 Image-to-Audio Story 🎧", layout="wide")
72
-
 
 
 
73
  st.title("Turn the Image into Audio Story")
74
 
75
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
 
 
 
 
 
 
 
 
 
76
 
77
- if uploaded_file is not None:
78
- st.image(uploaded_file)
79
-
80
- if st.button("🎨 Generate Story"):
81
- scenario = img2txt(uploaded_file) # Process the uploaded image
82
- prompt = f"Create a story based on: {scenario}"
83
- story = txt2story(prompt, top_k=5, top_p=0.8, temperature=1.5)
84
- txt2speech(story)
85
 
86
- st.session_state.story = story
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- st.audio("audio_story.mp3") # Use Streamlit's audio player
 
 
89
 
90
- email = st.text_input("Enter your email address:")
91
- if st.button("πŸ“€ Send to Email"):
92
- if validate_email(email):
93
- if send_story_email(email, story, "audio_story.mp3"):
94
- st.success(f"Email sent to: {email}")
95
- else:
96
- st.error("❌ Failed to send email.")
97
- else:
98
- st.error("Please enter a valid email address.")
99
 
100
  if __name__ == '__main__':
101
- main()
 
10
  from email.mime.text import MIMEText
11
  from email.mime.audio import MIMEAudio
12
 
13
+ # Image-to-text function
14
  def img2txt(url: str) -> str:
15
  captioning_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
16
  text = captioning_model(url, max_new_tokens=20)[0]["generated_text"]
17
  return text
18
 
19
+ # Text-to-story generation function
20
  def txt2story(prompt: str, top_k: int, top_p: float, temperature: float) -> str:
21
  client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
22
+
23
  story_prompt = f"Write a short story of no more than 250 words based on the following prompt: {prompt}"
24
+
25
  stream = client.chat.completions.create(
26
  model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
27
+ messages=[
28
+ {"role": "system", "content": '''As an experienced short story writer, write a meaningful story influenced by the provided prompt.
29
+ Ensure the story does not exceed 250 words.'''},
30
+ {"role": "user", "content": story_prompt}
31
+ ],
32
  top_k=top_k,
33
  top_p=top_p,
34
  temperature=temperature,
35
  stream=True
36
  )
37
+
38
+ story = ''
39
+ for chunk in stream:
40
+ story += chunk.choices[0].delta.content
41
+
42
  return story
43
 
44
+ # Text-to-speech function
45
  def txt2speech(text: str) -> None:
46
  tts = gTTS(text=text, lang='en')
47
  tts.save("audio_story.mp3")
48
 
49
+ # Main Streamlit application
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def main():
51
+ st.set_page_config(
52
+ page_title="🎨 Image-to-Audio Story 🎧",
53
+ page_icon="πŸ–ΌοΈ",
54
+ layout="wide"
55
+ )
56
  st.title("Turn the Image into Audio Story")
57
 
58
+ # Initialize session state variables
59
+ if "story" not in st.session_state:
60
+ st.session_state.story = ""
61
+ if "audio_file_path" not in st.session_state:
62
+ st.session_state.audio_file_path = ""
63
+ if "caption" not in st.session_state:
64
+ st.session_state.caption = ""
65
+
66
+ # Main content area
67
+ col1, col2 = st.columns([2, 3])
68
 
69
+ with col1:
70
+ # Image upload section
71
+ st.markdown("## πŸ“· Upload Image")
72
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
 
 
 
 
73
 
74
+ # Story preferences section
75
+ st.markdown("## 🎭 Story Preferences")
76
+ preferences = get_user_preferences()
77
+
78
+ with col2:
79
+ if uploaded_file is not None:
80
+ # Display uploaded image
81
+ st.markdown("## πŸ–ΌοΈ Your Image")
82
+ bytes_data = uploaded_file.read()
83
+ with open("uploaded_image.jpg", "wb") as file:
84
+ file.write(bytes_data)
85
+ st.image(uploaded_file, use_column_width=True)
86
+
87
+ # Process image and generate story
88
+ if st.button("🎨 Generate Story"):
89
+ try:
90
+ # Step 1: Generate Image Caption
91
+ with st.spinner("πŸ” Generating Image Caption..."):
92
+ scenario = img2txt("uploaded_image.jpg")
93
+ st.session_state.caption = scenario # Store caption in session state
94
+
95
+ # Step 2: Generate Story
96
+ with st.spinner("πŸ“ Generating Story..."):
97
+ prompt = f"""Based on the image description: '{scenario}',
98
+ create a {preferences['genre']} story set in {preferences['setting']}
99
+ in {preferences['continent']}. The story should have a {preferences['tone']}
100
+ tone and explore the theme of {preferences['theme']}. The main conflict
101
+ should be {preferences['conflict']}. The story should have a {preferences['twist']}
102
+ and end with a {preferences['ending']} ending."""
103
+
104
+ story = txt2story(prompt, top_k=5, top_p=0.8, temperature=1.5)
105
+ st.session_state.story = story # Store story in session state
106
+
107
+ # Step 3: Generate Audio Version
108
+ with st.spinner("🎧 Generating Audio Version..."):
109
+ txt2speech(story)
110
+ st.session_state.audio_file_path = "audio_story.mp3" # Store audio path in session state
111
+
112
+ except Exception as e:
113
+ st.error(f"An error occurred: {str(e)}")
114
+ st.warning("Please try again or contact support if the problem persists.")
115
+
116
+ # Display results if story exists in session state
117
+ if st.session_state.story:
118
+ st.markdown("---")
119
+
120
+ # Image caption
121
+ with st.expander("πŸ“œ Image Caption", expanded=True):
122
+ st.write(st.session_state.caption)
123
 
124
+ # Story text
125
+ with st.expander("πŸ“– Generated Story", expanded=True):
126
+ st.write(st.session_state.story)
127
 
128
+ # Audio player
129
+ with st.expander("🎧 Audio Version", expanded=True):
130
+ st.audio(st.session_state.audio_file_path)
 
 
 
 
 
 
131
 
132
  if __name__ == '__main__':
133
+ main()