fakeaitext / app.py
aigorithm's picture
upload
c19f44b verified
raw
history blame
4.61 kB
import gradio as gr
from transformers import pipeline
import requests
import random
import html
import base64
import os
import json
from datetime import datetime
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
chat_gen = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
# Static voice options
voice_options = {
"Rachel": "EXAVITQu4vr4xnSDxMaL",
"Adam": "21m00Tcm4TlvDq8ikWAM",
"Elli": "AZnzlk1XvdvUeBnXmlld",
"Josh": "VR6AewLTigWG4xSOukaG"
}
library_file = "/tmp/library.json"
def generate_with_voices(prompt, video, character_inputs):
try:
characters = [c.split(":")[0].strip() for c in character_inputs if ":" in c]
voices = {c.split(":")[0].strip(): c.split(":")[1].strip() for c in character_inputs if ":" in c}
system_prompt = (
f"Group chat between {', '.join(characters)}. Prompt: {prompt}. "
"Use casual format like: Name: message"
)
response = chat_gen(system_prompt, max_new_tokens=400)[0]["generated_text"]
chat_lines = [(line.split(':', 1)[0].strip(), line.split(':', 1)[1].strip()) for line in response.splitlines() if ':' in line]
audio_paths = []
chat_overlay_text = []
for i, (name, msg) in enumerate(chat_lines):
voice_id = voices.get(name, list(voice_options.values())[0])
eleven_response = requests.post(
f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
headers={
"xi-api-key": os.environ.get("ELEVEN_API_KEY", "demo-key"),
"Content-Type": "application/json"
},
json={ "text": msg, "model_id": "eleven_monolingual_v1" }
)
if eleven_response.status_code == 200:
file_path = f"/tmp/audio_{i}.mp3"
with open(file_path, 'wb') as f:
f.write(eleven_response.content)
audio_paths.append(file_path)
chat_overlay_text.append(f"{name}: {msg}")
final_audio = AudioFileClip(audio_paths[0])
for a in audio_paths[1:]:
final_audio = final_audio.append(AudioFileClip(a))
video_clip = VideoFileClip(video.name).resize(height=480)
video_clip = video_clip.set_audio(final_audio)
txt = "\n".join(chat_overlay_text)
txt_clip = TextClip(txt, fontsize=24, color='black', bg_color='white', font="Courier").set_duration(video_clip.duration)
txt_clip = txt_clip.set_position((20, 20))
final = CompositeVideoClip([video_clip, txt_clip])
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
out_path = f"/tmp/fakechat_{timestamp}.mp4"
final.write_videofile(out_path, codec="libx264", audio_codec="aac")
# Save to library
if os.path.exists(library_file):
with open(library_file, 'r') as f:
library = json.load(f)
else:
library = []
library.insert(0, {
"timestamp": timestamp,
"characters": characters,
"prompt": prompt,
"path": out_path
})
with open(library_file, 'w') as f:
json.dump(library, f, indent=2)
return out_path
except Exception as e:
return f"Error: {str(e)}"
def get_library():
if not os.path.exists(library_file):
return "No stories yet."
with open(library_file, 'r') as f:
library = json.load(f)
html_items = [f"<li><a href='file={item['path']}' target='_blank'>{item['timestamp']} β€” {', '.join(item['characters'])}</a></li>" for item in library]
return f"<ul>{''.join(html_items)}</ul>"
def build_voice_inputs():
return [gr.Textbox(label=f"Character {i+1} (Format: Name:Voice)", placeholder="e.g. Anna:Rachel") for i in range(4)]
with gr.Blocks() as app:
with gr.Row():
with gr.Column(scale=3):
character_inputs = build_voice_inputs()
prompt = gr.Textbox(label="Scene Prompt")
video = gr.File(label="Upload Background Video (.mp4)", file_types=[".mp4"])
generate_btn = gr.Button("Generate Story")
output = gr.File(label="Download MP4 Story")
with gr.Column(scale=1):
library_html = gr.HTML(label="My Saved Stories")
refresh_btn = gr.Button("πŸ” Refresh Library")
generate_btn.click(fn=generate_with_voices, inputs=[prompt, video, character_inputs], outputs=output)
refresh_btn.click(fn=get_library, inputs=[], outputs=library_html)
app.launch()