|
import gradio as gr |
|
import json |
|
from transformers import pipeline |
|
from PIL import Image |
|
import numpy as np |
|
import os |
|
import spaces |
|
|
|
|
|
text_generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct") |
|
|
|
|
|
tts_pipeline = pipeline("text-to-speech", model="suno/bark-small") |
|
|
|
|
|
image_generator = pipeline("text-to-image", model="stabilityai/sdxl-turbo") |
|
|
|
|
|
@spaces.GPU |
|
def generate_cartoon(script_text): |
|
|
|
prompt = f""" |
|
You are a cartoon script writer. Convert the following story into a detailed cartoon scene plan. |
|
For each scene, provide: |
|
1. Scene description (setting + action) |
|
2. Dialogue |
|
3. Characters involved |
|
4. Background description |
|
|
|
Story: |
|
""" |
|
{script_text} |
|
""" |
|
Return result in JSON format. |
|
""" |
|
|
|
response = text_generator(prompt, max_new_tokens=1024)[0]['generated_text'] |
|
|
|
try: |
|
scene_data = json.loads(response) |
|
except: |
|
return "Script expansion failed. Please refine input." |
|
|
|
os.makedirs("generated_images", exist_ok=True) |
|
os.makedirs("generated_audio", exist_ok=True) |
|
|
|
scene_results = [] |
|
|
|
for idx, scene in enumerate(scene_data.get("scenes", [])): |
|
|
|
background_prompt = scene.get("background_description", "cartoon background") |
|
background_image = image_generator(background_prompt).images[0] |
|
bg_path = f"generated_images/scene_{idx+1}.png" |
|
background_image.save(bg_path) |
|
|
|
|
|
dialogue = scene.get("dialogue", "") |
|
audio_output = tts_pipeline(dialogue) |
|
audio_path = f"generated_audio/scene_{idx+1}.wav" |
|
audio_output['audio'].export(audio_path, format="wav") |
|
|
|
scene_results.append((bg_path, audio_path)) |
|
|
|
return scene_results |
|
|
|
|
|
demo = gr.Interface( |
|
fn=generate_cartoon, |
|
inputs=gr.Textbox(label="Enter Cartoon Script", lines=10), |
|
outputs=gr.Gallery(label="Generated Scenes with Audio"), |
|
title="Cartoon Scene Generator", |
|
description="Enter a cartoon story script to generate scenes with background images and audio. Video merging should be done separately in Google Colab." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|