Update app.py
Browse files
app.py
CHANGED
@@ -1,84 +1,87 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
from
|
4 |
-
from
|
5 |
from PIL import Image
|
6 |
-
import
|
7 |
-
import
|
8 |
import spaces
|
9 |
-
from functools import lru_cache
|
10 |
|
11 |
-
#
|
12 |
-
|
13 |
-
buf = io.BytesIO()
|
14 |
-
img.save(buf, format="PNG")
|
15 |
-
return base64.b64encode(buf.getvalue()).decode()
|
16 |
|
17 |
-
|
18 |
-
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
def load_ghibli_model():
|
23 |
-
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
|
24 |
-
"nitrosocke/Ghibli-Diffusion",
|
25 |
-
torch_dtype=torch.float16,
|
26 |
-
use_safetensors=True
|
27 |
-
).to("cuda")
|
28 |
-
return pipe
|
29 |
|
30 |
-
|
31 |
-
def load_animegan_model():
|
32 |
-
model = torch.hub.load(
|
33 |
-
"bryandlee/animegan2-pytorch:main",
|
34 |
-
"generator",
|
35 |
-
pretrained="face_paint_512_v2"
|
36 |
-
).to("cuda").eval()
|
37 |
-
return model
|
38 |
-
|
39 |
-
# β
Image Processing (Gradio Image Upload)
|
40 |
@spaces.GPU
|
41 |
-
def
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
])
|
51 |
-
img_tensor = transform(img).unsqueeze(0).to("cuda")
|
52 |
-
with torch.no_grad():
|
53 |
-
out = animegan(img_tensor)[0].clamp(0, 1).cpu()
|
54 |
-
out_img = transforms.ToPILImage()(out)
|
55 |
-
return out_img
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
-
#
|
65 |
-
|
66 |
-
|
|
|
67 |
|
68 |
-
|
69 |
-
with gr.Tab("Web UI"):
|
70 |
-
img_input = gr.Image(type="pil", label="Upload Image")
|
71 |
-
effect_choice = gr.Radio(["ghibli", "anime"], label="Select Effect")
|
72 |
-
process_btn = gr.Button("Apply Effect")
|
73 |
-
img_output = gr.Image(label="Processed Image")
|
74 |
-
process_btn.click(process_image, [img_input, effect_choice], img_output)
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
83 |
|
84 |
-
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import json
|
3 |
+
from transformers import pipeline
|
4 |
+
from moviepy.editor import ImageClip, concatenate_videoclips
|
5 |
from PIL import Image
|
6 |
+
import numpy as np
|
7 |
+
import os
|
8 |
import spaces
|
|
|
9 |
|
10 |
+
# Text Expansion Model (use Mistral or LLaMA on ZeroGPU)
|
11 |
+
text_generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct")
|
|
|
|
|
|
|
12 |
|
13 |
+
# Text-to-Speech Model (Bark small)
|
14 |
+
tts_pipeline = pipeline("text-to-speech", model="suno/bark-small")
|
15 |
|
16 |
+
# Image Generation Model (LoRA-based)
|
17 |
+
image_generator = pipeline("text-to-image", model="stabilityai/sdxl-turbo")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
# Main Processing Function
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
@spaces.GPU
|
21 |
+
def generate_cartoon(script_text):
|
22 |
+
# Step 1: Expand Script
|
23 |
+
prompt = f"""
|
24 |
+
You are a cartoon script writer. Convert the following story into a detailed cartoon scene plan.
|
25 |
+
For each scene, provide:
|
26 |
+
1. Scene description (setting + action)
|
27 |
+
2. Dialogue
|
28 |
+
3. Characters involved
|
29 |
+
4. Background description
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
Story:
|
32 |
+
"""
|
33 |
+
{script_text}
|
34 |
+
"""
|
35 |
+
Return result in JSON format.
|
36 |
+
"""
|
37 |
+
|
38 |
+
response = text_generator(prompt, max_new_tokens=1024)[0]['generated_text']
|
39 |
+
|
40 |
+
try:
|
41 |
+
scene_data = json.loads(response)
|
42 |
+
except:
|
43 |
+
return "Script expansion failed. Please refine input."
|
44 |
+
|
45 |
+
characters = set()
|
46 |
+
backgrounds = set()
|
47 |
+
scene_clips = []
|
48 |
+
|
49 |
+
os.makedirs("generated_images", exist_ok=True)
|
50 |
+
os.makedirs("generated_audio", exist_ok=True)
|
51 |
+
|
52 |
+
for idx, scene in enumerate(scene_data.get("scenes", [])):
|
53 |
+
# Generate Background Image
|
54 |
+
background_prompt = scene.get("background_description", "cartoon background")
|
55 |
+
background_image = image_generator(background_prompt).images[0]
|
56 |
+
bg_path = f"generated_images/scene_{idx+1}.png"
|
57 |
+
background_image.save(bg_path)
|
58 |
+
|
59 |
+
# Generate TTS Audio
|
60 |
+
dialogue = scene.get("dialogue", "")
|
61 |
+
audio_output = tts_pipeline(dialogue)
|
62 |
+
audio_path = f"generated_audio/scene_{idx+1}.wav"
|
63 |
+
audio_output['audio'].export(audio_path, format="wav")
|
64 |
+
|
65 |
+
# Create Scene Video Clip
|
66 |
+
image_clip = ImageClip(bg_path).set_duration(5).fadein(1).fadeout(1)
|
67 |
+
image_clip = image_clip.set_audio(audio_path)
|
68 |
+
scene_clips.append(image_clip)
|
69 |
|
70 |
+
# Merge Scenes
|
71 |
+
final_video = concatenate_videoclips(scene_clips)
|
72 |
+
final_video_path = "final_cartoon_video.mp4"
|
73 |
+
final_video.write_videofile(final_video_path, fps=24)
|
74 |
|
75 |
+
return final_video_path
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
# Gradio UI
|
78 |
+
demo = gr.Interface(
|
79 |
+
fn=generate_cartoon,
|
80 |
+
inputs=gr.Textbox(label="Enter Cartoon Script", lines=10),
|
81 |
+
outputs=gr.Video(label="Generated Cartoon Video"),
|
82 |
+
title="Cartoon Video Generator",
|
83 |
+
description="Enter a short cartoon story script and generate a cartoon video (7-10 mins) automatically."
|
84 |
+
)
|
85 |
|
86 |
+
if __name__ == "__main__":
|
87 |
+
demo.launch()
|