File size: 2,381 Bytes
59be1d1
14b3fec
 
7fe7d39
14b3fec
 
76217ba
55ad485
14b3fec
 
76217ba
14b3fec
 
55ad485
14b3fec
 
0d40aa7
14b3fec
c88a35f
14b3fec
 
 
 
 
 
 
 
 
adde61a
14b3fec
 
 
 
 
 
71df4c0
14b3fec
 
 
 
 
 
 
 
 
 
71df4c0
 
14b3fec
 
 
 
 
 
 
 
 
 
 
 
 
71df4c0
76217ba
71df4c0
c88a35f
14b3fec
 
 
 
71df4c0
 
 
14b3fec
59be1d1
14b3fec
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import json
from transformers import pipeline
from PIL import Image
import numpy as np
import os
import spaces

# Text Expansion Model (use Mistral or LLaMA on ZeroGPU)
text_generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct")

# Text-to-Speech Model (Bark small)
tts_pipeline = pipeline("text-to-speech", model="suno/bark-small")

# Image Generation Model (LoRA-based)
image_generator = pipeline("text-to-image", model="stabilityai/sdxl-turbo")

# Main Processing Function
@spaces.GPU
def generate_cartoon(script_text):
    # Step 1: Expand Script
    prompt = f"""
    You are a cartoon script writer. Convert the following story into a detailed cartoon scene plan.
    For each scene, provide:
    1. Scene description (setting + action)
    2. Dialogue
    3. Characters involved
    4. Background description

    Story:
    """
    {script_text}
    """
    Return result in JSON format.
    """

    response = text_generator(prompt, max_new_tokens=1024)[0]['generated_text']

    try:
        scene_data = json.loads(response)
    except:
        return "Script expansion failed. Please refine input."

    os.makedirs("generated_images", exist_ok=True)
    os.makedirs("generated_audio", exist_ok=True)

    scene_results = []

    for idx, scene in enumerate(scene_data.get("scenes", [])):
        # Generate Background Image
        background_prompt = scene.get("background_description", "cartoon background")
        background_image = image_generator(background_prompt).images[0]
        bg_path = f"generated_images/scene_{idx+1}.png"
        background_image.save(bg_path)

        # Generate TTS Audio
        dialogue = scene.get("dialogue", "")
        audio_output = tts_pipeline(dialogue)
        audio_path = f"generated_audio/scene_{idx+1}.wav"
        audio_output['audio'].export(audio_path, format="wav")

        scene_results.append((bg_path, audio_path))

    return scene_results

# Gradio UI
demo = gr.Interface(
    fn=generate_cartoon,
    inputs=gr.Textbox(label="Enter Cartoon Script", lines=10),
    outputs=gr.Gallery(label="Generated Scenes with Audio"),
    title="Cartoon Scene Generator",
    description="Enter a cartoon story script to generate scenes with background images and audio. Video merging should be done separately in Google Colab."
)

if __name__ == "__main__":
    demo.launch()