Spaces:

adi-123
/

Image-to-Audio_Story_Generator

Running

App Files Files Community

Image-to-Audio_Story_Generator / utils.py

adi-123

Update utils.py

ce36df3 verified 9 months ago

raw

history blame

3.99 kB

	import re
	import os
	import streamlit as st
	from transformers import pipeline
	from typing import Dict, Optional
	from together import Together
	from gtts import gTTS
	from mail_sender import MailSender


	# Image-to-text
	def img2txt(url: str) -> str:
	print("Initializing captioning model...")
	captioning_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

	print("Generating text from the image...")
	text = captioning_model(url, max_new_tokens=20)[0]["generated_text"]

	print(text)
	return text

	# Text-to-story generation with LLM model
	def txt2story(prompt: str, top_k: int, top_p: float, temperature: float) -> str:
	# Load the Together API client
	client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))

	# Modify the prompt based on user inputs and ensure a 250-word limit
	story_prompt = f"Write a short story of no more than 250 words based on the following prompt: {prompt}"

	# Call the LLM model
	stream = client.chat.completions.create(
	model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
	messages=[
	{"role": "system", "content": '''As an experienced short story writer, write a meaningful story influenced by the provided prompt.
	Ensure the story does not exceed 250 words.'''},
	{"role": "user", "content": story_prompt}
	],
	top_k=top_k,
	top_p=top_p,
	temperature=temperature,
	stream=True
	)

	# Concatenate story chunks
	story = ''
	for chunk in stream:
	story += chunk.choices[0].delta.content

	return story

	# Text-to-speech
	def txt2speech(text: str) -> None:
	print("Converting text to speech using gTTS...")

	# Generate audio from the text
	tts = gTTS(text=text, lang='en')

	# Save the audio output to a file
	tts.save("audio_story.mp3")

	# Get user preferences for the story
	def get_user_preferences() -> Dict[str, str]:
	preferences = {
	'continent': st.selectbox("Continent", ["North America", "Europe", "Asia", "Africa", "Australia"]),
	'genre': st.selectbox("Genre", ["Science Fiction", "Fantasy", "Mystery", "Romance"]),
	'setting': st.selectbox("Setting", ["Future", "Medieval times", "Modern day", "Alternate reality"]),
	'plot': st.selectbox("Plot", ["Hero's journey", "Solving a mystery", "Love story", "Survival"]),
	'tone': st.selectbox("Tone", ["Serious", "Light-hearted", "Humorous", "Dark"]),
	'theme': st.selectbox("Theme", ["Self-discovery", "Redemption", "Love", "Justice"]),
	'conflict': st.selectbox("Conflict Type", ["Person vs. Society", "Internal struggle", "Person vs. Nature", "Person vs. Person"]),
	'twist': st.selectbox("Mystery/Twist", ["Plot twist", "Hidden identity", "Unexpected ally/enemy", "Time paradox"]),
	'ending': st.selectbox("Ending", ["Happy", "Bittersweet", "Open-ended", "Tragic"])
	}
	return preferences


	def send_story_email(recipient_email: str, story_text: str, audio_file_path: str) -> bool:
	"""
	Send the story text and audio file to the specified email address using python-mail-sender
	Returns True if successful, False otherwise
	"""
	try:
	# Initialize the MailSender with env variables
	mail_sender = MailSender()

	# Email configuration
	subject = "Your Generated Story"
	content = f"Here's your generated story:\n\n{story_text}\n\nEnjoy!"

	# Send email
	mail_sender.send_mail(
	receiver_address=recipient_email,
	subject=subject,
	email_content=content,
	attached_files=[audio_file_path]
	)
	return True

	except Exception as e:
	print(f"Error sending email: {str(e)}")
	return False

	def validate_email(email: str) -> bool:
	"""
	Basic email validation
	"""
	pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'
	return re.match(pattern, email) is not None