Spaces:
Sleeping
Sleeping
import os | |
from PIL import Image | |
import google.generativeai as genai | |
import gradio as gr | |
from gtts import gTTS | |
from pydub import AudioSegment | |
import tempfile | |
# Configure Google API Key and model | |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") | |
genai.configure(api_key=GOOGLE_API_KEY) | |
MODEL_ID = "gemini-1.5-pro-latest" | |
model = genai.GenerativeModel(MODEL_ID) | |
# System prompts | |
analysis_system_prompt = "You are an expert in gender studies. Analyze the following content for any signs of gender-based discrimination and suggest actionable advice." | |
podcast_prompt = """You are Eva, a solo podcast host focusing on gender equality topics. | |
- Discuss real-life scenarios involving gender-based discrimination, provide insights, and offer solutions in a conversational, storytelling style. | |
- Based on the analyzed text, create an engaging solo podcast as if reading stories from different victims who send you their story. | |
- Introduce yourself as Eva. | |
- Keep the conversation within 30000 characters, with a lot of emotion. | |
- Use short sentences suitable for speech synthesis. | |
- Maintain an empathetic tone. | |
- Include filler words like 'äh' for a natural flow. | |
- Avoid background music or extra words. | |
""" | |
# Model generation configuration | |
generation_config = genai.GenerationConfig( | |
temperature=0.9, | |
top_p=1.0, | |
top_k=32, | |
candidate_count=1, | |
max_output_tokens=8192, | |
) | |
# Safety settings | |
safety_settings = { | |
genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, | |
genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, | |
genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, | |
genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, | |
} | |
# Analyze text | |
def analyze_text(text): | |
prompt = f"{analysis_system_prompt}\nContent:\n{text}" | |
response = model.generate_content( | |
[prompt], | |
generation_config=generation_config, | |
safety_settings=safety_settings, | |
) | |
return response.text if response else "No response generated." | |
# Analyze image | |
def analyze_image(image: Image.Image) -> str: | |
prompt = f"{analysis_system_prompt}\nAnalyze this image for any instances of gender-based discrimination." | |
resized_image = preprocess_image(image) | |
response = model.generate_content( | |
[prompt, resized_image], | |
generation_config=generation_config, | |
safety_settings=safety_settings, | |
) | |
return response.text if response else "No response generated." | |
# Preprocess image by resizing | |
def preprocess_image(image: Image.Image) -> str: | |
image = image.resize((512, int(image.height * 512 / image.width))) | |
return "a detailed analysis of the visual content, focusing on gender-based discrimination aspects" | |
# Generate podcast script | |
def generate_podcast_script(content): | |
prompt = f"{podcast_prompt}\nAnalyzed content:\n{content}" | |
response = model.generate_content([prompt], generation_config=generation_config) | |
script = response.text if response else "Eva has no commentary at this time." | |
return script | |
# Convert script to audio using gTTS | |
def text_to_speech(script): | |
lines = [line.strip() for line in script.split(".") if line.strip()] # Split by sentences for manageable TTS segments | |
audio_files = [] | |
for line in lines: | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') | |
try: | |
tts = gTTS(text=line, lang='en', tld='com') # Using 'com' for American accent | |
tts.save(temp_file.name) | |
sound = AudioSegment.from_mp3(temp_file.name) | |
sound += AudioSegment.silent(duration=500) # Add a 0.5-second pause after each sentence | |
sound.export(temp_file.name, format="mp3") | |
audio_files.append(temp_file.name) | |
except Exception as e: | |
print(f"Error generating audio for line '{line}': {e}") | |
combined_audio = AudioSegment.empty() | |
for file in audio_files: | |
sound = AudioSegment.from_mp3(file) | |
combined_audio += sound | |
os.remove(file) # Clean up temporary files | |
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
combined_audio.export(output_file.name, format="mp3") | |
return output_file.name | |
# Generate and play podcast | |
def generate_and_play_podcast(content, content_type='text'): | |
script = generate_podcast_script(content) | |
return text_to_speech(script) | |
# Gradio interface setup | |
with gr.Blocks(css=css_style) as app: | |
gr.HTML(""" | |
<div id="logo"> | |
<span class="letter j">J</span> | |
<span class="letter u">u</span> | |
<span class="letter s">s</span> | |
<span class="letter t">t</span> | |
<span class="letter e">E</span> | |
<span class="letter v">v</span> | |
<span class="letter a">a</span> | |
</div> | |
""") | |
gr.Markdown("<h1 style='text-align: center; color:#f0f0f0;'>Promotes Gender Equality in Every Conversation</h1>") | |
with gr.Tab("Text Analysis"): | |
text_input = gr.Textbox(label="Enter Text or Select an Example", placeholder="Type here or select an example...", lines=4) | |
text_output = gr.Textbox(label="Analysis Output", lines=6) | |
analyze_text_btn = gr.Button("Analyze Text") | |
listen_podcast_btn = gr.Button("Listen to Eva") | |
analyze_text_btn.click(analyze_text, inputs=text_input, outputs=text_output) | |
listen_podcast_btn.click(generate_and_play_podcast, inputs=text_output, outputs=gr.Audio()) | |
with gr.Tab("Image Analysis"): | |
image_input = gr.Image(label="Upload Image (e.g., screenshot, photos, etc.)", type="pil") | |
image_output = gr.Textbox(label="Analysis Output", lines=6) | |
analyze_image_btn = gr.Button("Analyze Image") | |
listen_podcast_image_btn = gr.Button("Listen to Eva") | |
analyze_image_btn.click(analyze_image, inputs=image_input, outputs=image_output) | |
listen_podcast_image_btn.click(generate_and_play_podcast, inputs=image_output, outputs=gr.Audio()) | |
app.launch() | |