File size: 4,609 Bytes
8c9f790
cdb0d14
c19f44b
 
 
 
 
 
 
 
8c9f790
c19f44b
8c9f790
c19f44b
 
 
 
 
 
 
8c9f790
c19f44b
8c9f790
c19f44b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
from transformers import pipeline
import requests
import random
import html
import base64
import os
import json
from datetime import datetime
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip

chat_gen = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")

# Static voice options
voice_options = {
    "Rachel": "EXAVITQu4vr4xnSDxMaL",
    "Adam": "21m00Tcm4TlvDq8ikWAM",
    "Elli": "AZnzlk1XvdvUeBnXmlld",
    "Josh": "VR6AewLTigWG4xSOukaG"
}

library_file = "/tmp/library.json"

def generate_with_voices(prompt, video, character_inputs):
    try:
        characters = [c.split(":")[0].strip() for c in character_inputs if ":" in c]
        voices = {c.split(":")[0].strip(): c.split(":")[1].strip() for c in character_inputs if ":" in c}

        system_prompt = (
            f"Group chat between {', '.join(characters)}. Prompt: {prompt}. "
            "Use casual format like: Name: message"
        )
        response = chat_gen(system_prompt, max_new_tokens=400)[0]["generated_text"]
        chat_lines = [(line.split(':', 1)[0].strip(), line.split(':', 1)[1].strip()) for line in response.splitlines() if ':' in line]

        audio_paths = []
        chat_overlay_text = []

        for i, (name, msg) in enumerate(chat_lines):
            voice_id = voices.get(name, list(voice_options.values())[0])
            eleven_response = requests.post(
                f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
                headers={
                    "xi-api-key": os.environ.get("ELEVEN_API_KEY", "demo-key"),
                    "Content-Type": "application/json"
                },
                json={ "text": msg, "model_id": "eleven_monolingual_v1" }
            )
            if eleven_response.status_code == 200:
                file_path = f"/tmp/audio_{i}.mp3"
                with open(file_path, 'wb') as f:
                    f.write(eleven_response.content)
                audio_paths.append(file_path)

            chat_overlay_text.append(f"{name}: {msg}")

        final_audio = AudioFileClip(audio_paths[0])
        for a in audio_paths[1:]:
            final_audio = final_audio.append(AudioFileClip(a))

        video_clip = VideoFileClip(video.name).resize(height=480)
        video_clip = video_clip.set_audio(final_audio)

        txt = "\n".join(chat_overlay_text)
        txt_clip = TextClip(txt, fontsize=24, color='black', bg_color='white', font="Courier").set_duration(video_clip.duration)
        txt_clip = txt_clip.set_position((20, 20))

        final = CompositeVideoClip([video_clip, txt_clip])
        timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
        out_path = f"/tmp/fakechat_{timestamp}.mp4"
        final.write_videofile(out_path, codec="libx264", audio_codec="aac")

        # Save to library
        if os.path.exists(library_file):
            with open(library_file, 'r') as f:
                library = json.load(f)
        else:
            library = []

        library.insert(0, {
            "timestamp": timestamp,
            "characters": characters,
            "prompt": prompt,
            "path": out_path
        })

        with open(library_file, 'w') as f:
            json.dump(library, f, indent=2)

        return out_path
    except Exception as e:
        return f"Error: {str(e)}"

def get_library():
    if not os.path.exists(library_file):
        return "No stories yet."
    with open(library_file, 'r') as f:
        library = json.load(f)
    html_items = [f"<li><a href='file={item['path']}' target='_blank'>{item['timestamp']}{', '.join(item['characters'])}</a></li>" for item in library]
    return f"<ul>{''.join(html_items)}</ul>"

def build_voice_inputs():
    return [gr.Textbox(label=f"Character {i+1} (Format: Name:Voice)", placeholder="e.g. Anna:Rachel") for i in range(4)]

with gr.Blocks() as app:
    with gr.Row():
        with gr.Column(scale=3):
            character_inputs = build_voice_inputs()
            prompt = gr.Textbox(label="Scene Prompt")
            video = gr.File(label="Upload Background Video (.mp4)", file_types=[".mp4"])
            generate_btn = gr.Button("Generate Story")
            output = gr.File(label="Download MP4 Story")
        with gr.Column(scale=1):
            library_html = gr.HTML(label="My Saved Stories")
            refresh_btn = gr.Button("🔁 Refresh Library")

    generate_btn.click(fn=generate_with_voices, inputs=[prompt, video, character_inputs], outputs=output)
    refresh_btn.click(fn=get_library, inputs=[], outputs=library_html)

app.launch()