File size: 2,381 Bytes
59be1d1 14b3fec 7fe7d39 14b3fec 76217ba 55ad485 14b3fec 76217ba 14b3fec 55ad485 14b3fec 0d40aa7 14b3fec c88a35f 14b3fec adde61a 14b3fec 71df4c0 14b3fec 71df4c0 14b3fec 71df4c0 76217ba 71df4c0 c88a35f 14b3fec 71df4c0 14b3fec 59be1d1 14b3fec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import json
from transformers import pipeline
from PIL import Image
import numpy as np
import os
import spaces
# Text Expansion Model (use Mistral or LLaMA on ZeroGPU)
text_generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct")
# Text-to-Speech Model (Bark small)
tts_pipeline = pipeline("text-to-speech", model="suno/bark-small")
# Image Generation Model (LoRA-based)
image_generator = pipeline("text-to-image", model="stabilityai/sdxl-turbo")
# Main Processing Function
@spaces.GPU
def generate_cartoon(script_text):
# Step 1: Expand Script
prompt = f"""
You are a cartoon script writer. Convert the following story into a detailed cartoon scene plan.
For each scene, provide:
1. Scene description (setting + action)
2. Dialogue
3. Characters involved
4. Background description
Story:
"""
{script_text}
"""
Return result in JSON format.
"""
response = text_generator(prompt, max_new_tokens=1024)[0]['generated_text']
try:
scene_data = json.loads(response)
except:
return "Script expansion failed. Please refine input."
os.makedirs("generated_images", exist_ok=True)
os.makedirs("generated_audio", exist_ok=True)
scene_results = []
for idx, scene in enumerate(scene_data.get("scenes", [])):
# Generate Background Image
background_prompt = scene.get("background_description", "cartoon background")
background_image = image_generator(background_prompt).images[0]
bg_path = f"generated_images/scene_{idx+1}.png"
background_image.save(bg_path)
# Generate TTS Audio
dialogue = scene.get("dialogue", "")
audio_output = tts_pipeline(dialogue)
audio_path = f"generated_audio/scene_{idx+1}.wav"
audio_output['audio'].export(audio_path, format="wav")
scene_results.append((bg_path, audio_path))
return scene_results
# Gradio UI
demo = gr.Interface(
fn=generate_cartoon,
inputs=gr.Textbox(label="Enter Cartoon Script", lines=10),
outputs=gr.Gallery(label="Generated Scenes with Audio"),
title="Cartoon Scene Generator",
description="Enter a cartoon story script to generate scenes with background images and audio. Video merging should be done separately in Google Colab."
)
if __name__ == "__main__":
demo.launch()
|