Spaces:
Sleeping
Sleeping
upload
Browse files- app.py +114 -7
- requirements.txt +3 -1
app.py
CHANGED
@@ -1,13 +1,120 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
+
import requests
|
4 |
+
import random
|
5 |
+
import html
|
6 |
+
import base64
|
7 |
+
import os
|
8 |
+
import json
|
9 |
+
from datetime import datetime
|
10 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip
|
11 |
|
12 |
+
chat_gen = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
|
13 |
|
14 |
+
# Static voice options
|
15 |
+
voice_options = {
|
16 |
+
"Rachel": "EXAVITQu4vr4xnSDxMaL",
|
17 |
+
"Adam": "21m00Tcm4TlvDq8ikWAM",
|
18 |
+
"Elli": "AZnzlk1XvdvUeBnXmlld",
|
19 |
+
"Josh": "VR6AewLTigWG4xSOukaG"
|
20 |
+
}
|
21 |
|
22 |
+
library_file = "/tmp/library.json"
|
23 |
|
24 |
+
def generate_with_voices(prompt, video, character_inputs):
|
25 |
+
try:
|
26 |
+
characters = [c.split(":")[0].strip() for c in character_inputs if ":" in c]
|
27 |
+
voices = {c.split(":")[0].strip(): c.split(":")[1].strip() for c in character_inputs if ":" in c}
|
28 |
+
|
29 |
+
system_prompt = (
|
30 |
+
f"Group chat between {', '.join(characters)}. Prompt: {prompt}. "
|
31 |
+
"Use casual format like: Name: message"
|
32 |
+
)
|
33 |
+
response = chat_gen(system_prompt, max_new_tokens=400)[0]["generated_text"]
|
34 |
+
chat_lines = [(line.split(':', 1)[0].strip(), line.split(':', 1)[1].strip()) for line in response.splitlines() if ':' in line]
|
35 |
+
|
36 |
+
audio_paths = []
|
37 |
+
chat_overlay_text = []
|
38 |
+
|
39 |
+
for i, (name, msg) in enumerate(chat_lines):
|
40 |
+
voice_id = voices.get(name, list(voice_options.values())[0])
|
41 |
+
eleven_response = requests.post(
|
42 |
+
f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
|
43 |
+
headers={
|
44 |
+
"xi-api-key": os.environ.get("ELEVEN_API_KEY", "demo-key"),
|
45 |
+
"Content-Type": "application/json"
|
46 |
+
},
|
47 |
+
json={ "text": msg, "model_id": "eleven_monolingual_v1" }
|
48 |
+
)
|
49 |
+
if eleven_response.status_code == 200:
|
50 |
+
file_path = f"/tmp/audio_{i}.mp3"
|
51 |
+
with open(file_path, 'wb') as f:
|
52 |
+
f.write(eleven_response.content)
|
53 |
+
audio_paths.append(file_path)
|
54 |
+
|
55 |
+
chat_overlay_text.append(f"{name}: {msg}")
|
56 |
+
|
57 |
+
final_audio = AudioFileClip(audio_paths[0])
|
58 |
+
for a in audio_paths[1:]:
|
59 |
+
final_audio = final_audio.append(AudioFileClip(a))
|
60 |
+
|
61 |
+
video_clip = VideoFileClip(video.name).resize(height=480)
|
62 |
+
video_clip = video_clip.set_audio(final_audio)
|
63 |
+
|
64 |
+
txt = "\n".join(chat_overlay_text)
|
65 |
+
txt_clip = TextClip(txt, fontsize=24, color='black', bg_color='white', font="Courier").set_duration(video_clip.duration)
|
66 |
+
txt_clip = txt_clip.set_position((20, 20))
|
67 |
+
|
68 |
+
final = CompositeVideoClip([video_clip, txt_clip])
|
69 |
+
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
|
70 |
+
out_path = f"/tmp/fakechat_{timestamp}.mp4"
|
71 |
+
final.write_videofile(out_path, codec="libx264", audio_codec="aac")
|
72 |
+
|
73 |
+
# Save to library
|
74 |
+
if os.path.exists(library_file):
|
75 |
+
with open(library_file, 'r') as f:
|
76 |
+
library = json.load(f)
|
77 |
+
else:
|
78 |
+
library = []
|
79 |
+
|
80 |
+
library.insert(0, {
|
81 |
+
"timestamp": timestamp,
|
82 |
+
"characters": characters,
|
83 |
+
"prompt": prompt,
|
84 |
+
"path": out_path
|
85 |
+
})
|
86 |
+
|
87 |
+
with open(library_file, 'w') as f:
|
88 |
+
json.dump(library, f, indent=2)
|
89 |
+
|
90 |
+
return out_path
|
91 |
+
except Exception as e:
|
92 |
+
return f"Error: {str(e)}"
|
93 |
+
|
94 |
+
def get_library():
|
95 |
+
if not os.path.exists(library_file):
|
96 |
+
return "No stories yet."
|
97 |
+
with open(library_file, 'r') as f:
|
98 |
+
library = json.load(f)
|
99 |
+
html_items = [f"<li><a href='file={item['path']}' target='_blank'>{item['timestamp']} — {', '.join(item['characters'])}</a></li>" for item in library]
|
100 |
+
return f"<ul>{''.join(html_items)}</ul>"
|
101 |
+
|
102 |
+
def build_voice_inputs():
|
103 |
+
return [gr.Textbox(label=f"Character {i+1} (Format: Name:Voice)", placeholder="e.g. Anna:Rachel") for i in range(4)]
|
104 |
+
|
105 |
+
with gr.Blocks() as app:
|
106 |
+
with gr.Row():
|
107 |
+
with gr.Column(scale=3):
|
108 |
+
character_inputs = build_voice_inputs()
|
109 |
+
prompt = gr.Textbox(label="Scene Prompt")
|
110 |
+
video = gr.File(label="Upload Background Video (.mp4)", file_types=[".mp4"])
|
111 |
+
generate_btn = gr.Button("Generate Story")
|
112 |
+
output = gr.File(label="Download MP4 Story")
|
113 |
+
with gr.Column(scale=1):
|
114 |
+
library_html = gr.HTML(label="My Saved Stories")
|
115 |
+
refresh_btn = gr.Button("🔁 Refresh Library")
|
116 |
+
|
117 |
+
generate_btn.click(fn=generate_with_voices, inputs=[prompt, video, character_inputs], outputs=output)
|
118 |
+
refresh_btn.click(fn=get_library, inputs=[], outputs=library_html)
|
119 |
+
|
120 |
+
app.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
transformers
|
2 |
gradio
|
3 |
-
torch
|
|
|
|
|
|
1 |
transformers
|
2 |
gradio
|
3 |
+
torch
|
4 |
+
requests
|
5 |
+
moviepy
|