Spaces:
Sleeping
Sleeping
File size: 7,761 Bytes
9fe6b65 613b43a 9fe6b65 613b43a 9fe6b65 613b43a 9fe6b65 613b43a 9fe6b65 613b43a 9fe6b65 613b43a 9fe6b65 613b43a 9fe6b65 613b43a 9fe6b65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import streamlit as st
import requests
import os
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, VitsModel, AutoTokenizer
import torch
import soundfile as sf
# API keys for other features (optional)
Image_Token = os.getenv('Image_generation')
Content_Token = os.getenv('ContentGeneration')
Image_prompt_token = os.getenv('Prompt_generation')
# API Headers for external services (optional)
Image_generation = {"Authorization": f"Bearer {Image_Token}"}
Content_generation = {
"Authorization": f"Bearer {Content_Token}",
"Content-Type": "application/json"
}
Image_Prompt = {
"Authorization": f"Bearer {Image_prompt_token}",
"Content-Type": "application/json"
}
# Text-to-Image Model API URLs
image_generation_urls = {
"black-forest-labs/FLUX.1-schnell": "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell",
"CompVis/stable-diffusion-v1-4": "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4",
"black-forest-labs/FLUX.1-dev": "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
}
# Default content generation model
content_models = {
"llama-3.1-70b-versatile": "llama-3.1-70b-versatile",
"llama3-8b-8192": "llama3-8b-8192",
"gemma2-9b-it": "gemma2-9b-it",
"mixtral-8x7b-32768": "mixtral-8x7b-32768"
}
# Load the translation model and tokenizer locally
@st.cache_resource
def load_translation_model():
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt")
return model, tokenizer
# Function to perform translation locally
def translate_text_local(text):
model, tokenizer = load_translation_model()
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
return translated_text
# Function to query Groq content generation model (optional)
def generate_content(english_text, max_tokens, temperature, model):
url = "https://api.groq.com/openai/v1/chat/completions"
payload = {
"model": model,
"messages": [
{"role": "system", "content": "You are a creative and insightful writer."},
{"role": "user", "content": f"Write educational content about {english_text} within {max_tokens} tokens."}
],
"max_tokens": max_tokens,
"temperature": temperature
}
response = requests.post(url, json=payload, headers=Content_generation)
if response.status_code == 200:
result = response.json()
return result['choices'][0]['message']['content']
else:
st.error(f"Content Generation Error: {response.status_code}")
return None
# Function to generate image prompt (optional)
def generate_image_prompt(english_text):
payload = {
"model": "mixtral-8x7b-32768",
"messages": [
{"role": "system", "content": "You are a professional Text to image prompt generator."},
{"role": "user", "content": f"Create a text to image generation prompt about {english_text} within 30 tokens."}
],
"max_tokens": 30
}
response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=Image_Prompt)
if response.status_code == 200:
result = response.json()
return result['choices'][0]['message']['content']
else:
st.error(f"Prompt Generation Error: {response.status_code}")
return None
# Function to generate an image from the prompt (optional)
def generate_image(image_prompt, model_url):
data = {"inputs": image_prompt}
response = requests.post(model_url, headers=Image_generation, json=data)
if response.status_code == 200:
return response.content
else:
st.error(f"Image Generation Error {response.status_code}: {response.text}")
return None
# New Function to generate speech from text using VitsModel
def generate_speech(text):
model = VitsModel.from_pretrained("facebook/mms-tts-eng")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
inputs = tokenizer(text, return_tensors="pt")
# Generate the speech waveform
with torch.no_grad():
output = model(**inputs).waveform
# Save the waveform as an audio file
audio_path = "output.wav"
sf.write(audio_path, output.numpy().flatten(), 16000)
return audio_path
# User Guide Section
def show_user_guide():
st.title("FusionMind User Guide")
st.write("""
... [content unchanged] ...
""")
# Main Streamlit app
def main():
# Sidebar Menu
st.sidebar.title("FusionMind Options")
page = st.sidebar.radio("Select a page:", ["Main App", "User Guide"])
if page == "User Guide":
show_user_guide()
return
st.title("🅰️ℹ️ FusionMind ➡️ Multimodal")
# Sidebar for temperature, token adjustment, and model selection
st.sidebar.header("Settings")
temperature = st.sidebar.slider("Select Temperature", 0.1, 1.0, 0.7)
max_tokens = st.sidebar.slider("Max Tokens for Content Generation", 100, 400, 200)
# Content generation model selection
content_model = st.sidebar.selectbox("Select Content Generation Model", list(content_models.keys()), index=0)
# Image generation model selection
image_model = st.sidebar.selectbox("Select Image Generation Model", list(image_generation_urls.keys()), index=0)
# Suggested inputs
st.write("## Suggested Inputs")
suggestions = ["தரவு அறிவியல்", "உளவியல்", "ராக்கெட் எப்படி வேலை செய்கிறது"]
selected_suggestion = st.selectbox("Select a suggestion or enter your own:", [""] + suggestions)
# Input box for user
tamil_input = st.text_input("Enter Tamil text (or select a suggestion):", selected_suggestion)
if st.button("Generate"):
# Step 1: Translation (Tamil to English)
if tamil_input:
st.write("### Translated English Text:")
english_text = translate_text_local(tamil_input)
if english_text:
st.success(english_text)
# Step 2: Generate Educational Content
st.write("### Generated Content:")
with st.spinner('Generating content...'):
content_output = generate_content(english_text, max_tokens, temperature, content_models[content_model])
if content_output:
st.success(content_output)
# Step 4: Generate speech from the content
st.write("### Generated Speech:")
with st.spinner('Generating speech...'):
audio_path = generate_speech(content_output)
audio_file = open(audio_path, 'rb')
audio_bytes = audio_file.read()
st.audio(audio_bytes, format="audio/wav")
# Step 3: Generate Image from the prompt (optional)
st.write("### Generated Image:")
with st.spinner('Generating image...'):
image_prompt = generate_image_prompt(english_text)
image_data = generate_image(image_prompt, image_generation_urls[image_model])
if image_data:
st.image(image_data, caption="Generated Image")
if __name__ == "__main__":
main()
|