Spaces:

adi-123
/

Image-to-Audio_Story_Generator

Running

App Files Files

Image-to-Audio_Story_Generator / app.py

adi-123's picture

Update app.py

2b7f7bd verified 12 months ago

2.98 kB

	import os
	import streamlit as st
	import requests
	from transformers import pipeline
	from typing import Dict
	from together import Together
	from utils import img2txt, txt2story, txt2speech, get_user_preferences


	# Main function
	def main():
	st.set_page_config(page_title="🎨 Image-to-Audio Story 🎧", page_icon="🖼️")
	st.title("Turn the Image into Audio Story")

	# Allows users to upload an image file
	uploaded_file = st.file_uploader("# 📷 Upload an image...", type=["jpg", "jpeg", "png"])

	# Parameters for LLM model (in the sidebar)
	st.sidebar.markdown("# LLM Inference Configuration Parameters")
	top_k = st.sidebar.number_input("Top-K", min_value=1, max_value=100, value=5)
	top_p = st.sidebar.number_input("Top-P", min_value=0.0, max_value=1.0, value=0.8)
	temperature = st.sidebar.number_input("Temperature", min_value=0.1, max_value=2.0, value=1.5)

	# Get user preferences for the story
	st.markdown("## Story Preferences")
	preferences = get_user_preferences()

	if uploaded_file is not None:
	# Reads and saves uploaded image file
	bytes_data = uploaded_file.read()
	with open("uploaded_image.jpg", "wb") as file:
	file.write(bytes_data)

	st.image(uploaded_file, caption='🖼️ Uploaded Image', use_column_width=True)

	# Initiates AI processing and story generation
	with st.spinner("## 🤖 AI is at Work! "):
	scenario = img2txt("uploaded_image.jpg") # Extracts text from the image

	# Modify the prompt to include user preferences
	prompt = f"Based on the image description: '{scenario}', create a {preferences['genre']} story set in {preferences['setting']} in {preferences['continent']}. " \
	f"The story should have a {preferences['tone']} tone and explore the theme of {preferences['theme']}. " \
	f"The main conflict should be {preferences['conflict']}. " \
	f"The story should have a {preferences['twist']} and end with a {preferences['ending']} ending."

	story = txt2story(prompt, top_k, top_p, temperature) # Generates a story based on the image text, LLM params, and user preferences

	txt2speech(story) # Converts the story to audio

	st.markdown("---")
	st.markdown("## 📜 Image Caption")
	st.write(scenario)

	st.markdown("---")
	st.markdown("## 📖 Story")
	st.write(story)

	st.markdown("---")
	st.markdown("## 🎧 Audio Story")
	st.audio("audio_story.wav")


	if __name__ == '__main__':
	main()

	# Credits
	st.markdown("### Credits")
	st.caption('''
	Made with ❤️ by @Aditya-Neural-Net-Ninja\n
	Utilizes Image-to-Text, Text Generation, Text-to-Speech Transformer Models\n
	Gratitude to Streamlit, 🤗 Spaces for Deployment & Hosting
	''')