Munaf1987 commited on
Commit
14b3fec
Β·
verified Β·
1 Parent(s): b583b94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -71
app.py CHANGED
@@ -1,84 +1,87 @@
1
  import gradio as gr
2
- import torch
3
- from diffusers import StableDiffusionImg2ImgPipeline
4
- from torchvision import transforms
5
  from PIL import Image
6
- import io
7
- import base64
8
  import spaces
9
- from functools import lru_cache
10
 
11
- # Base64 utilities
12
- def pil_to_b64(img: Image.Image) -> str:
13
- buf = io.BytesIO()
14
- img.save(buf, format="PNG")
15
- return base64.b64encode(buf.getvalue()).decode()
16
 
17
- def b64_to_pil(b64: str) -> Image.Image:
18
- return Image.open(io.BytesIO(base64.b64decode(b64))).convert("RGB")
19
 
20
- # βœ… Cached Model Loaders (ZeroGPU Safe)
21
- @lru_cache(maxsize=2)
22
- def load_ghibli_model():
23
- pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
24
- "nitrosocke/Ghibli-Diffusion",
25
- torch_dtype=torch.float16,
26
- use_safetensors=True
27
- ).to("cuda")
28
- return pipe
29
 
30
- @lru_cache(maxsize=2)
31
- def load_animegan_model():
32
- model = torch.hub.load(
33
- "bryandlee/animegan2-pytorch:main",
34
- "generator",
35
- pretrained="face_paint_512_v2"
36
- ).to("cuda").eval()
37
- return model
38
-
39
- # βœ… Image Processing (Gradio Image Upload)
40
  @spaces.GPU
41
- def process_image(img: Image.Image, effect: str) -> Image.Image:
42
- if effect == "ghibli":
43
- pipe = load_ghibli_model()
44
- out_img = pipe(prompt="ghibli style", image=img, strength=0.5, guidance_scale=7.5).images[0]
45
- else:
46
- animegan = load_animegan_model()
47
- transform = transforms.Compose([
48
- transforms.Resize((512, 512)),
49
- transforms.ToTensor()
50
- ])
51
- img_tensor = transform(img).unsqueeze(0).to("cuda")
52
- with torch.no_grad():
53
- out = animegan(img_tensor)[0].clamp(0, 1).cpu()
54
- out_img = transforms.ToPILImage()(out)
55
- return out_img
56
 
57
- # βœ… Base64 API Processing
58
- @spaces.GPU
59
- def process_base64(b64: str, effect: str) -> str:
60
- img = b64_to_pil(b64)
61
- out_img = process_image(img, effect)
62
- return pil_to_b64(out_img)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- # βœ… Gradio UI
65
- with gr.Blocks() as demo:
66
- gr.Markdown("# 🎨 Ghibli & AnimeGAN Effects (ZeroGPU Compatible)")
 
67
 
68
- # Image Upload Tab
69
- with gr.Tab("Web UI"):
70
- img_input = gr.Image(type="pil", label="Upload Image")
71
- effect_choice = gr.Radio(["ghibli", "anime"], label="Select Effect")
72
- process_btn = gr.Button("Apply Effect")
73
- img_output = gr.Image(label="Processed Image")
74
- process_btn.click(process_image, [img_input, effect_choice], img_output)
75
 
76
- # Base64 API Tab
77
- with gr.Tab("Base64 API"):
78
- b64_input = gr.Textbox(label="Input Image (Base64)", lines=5)
79
- effect_choice_b64 = gr.Radio(["ghibli", "anime"], label="Select Effect")
80
- process_btn_b64 = gr.Button("Run API")
81
- b64_output = gr.Textbox(label="Output Image (Base64)", lines=5)
82
- process_btn_b64.click(process_base64, [b64_input, effect_choice_b64], b64_output)
 
83
 
84
- demo.launch()
 
 
1
  import gradio as gr
2
+ import json
3
+ from transformers import pipeline
4
+ from moviepy.editor import ImageClip, concatenate_videoclips
5
  from PIL import Image
6
+ import numpy as np
7
+ import os
8
  import spaces
 
9
 
10
+ # Text Expansion Model (use Mistral or LLaMA on ZeroGPU)
11
+ text_generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct")
 
 
 
12
 
13
+ # Text-to-Speech Model (Bark small)
14
+ tts_pipeline = pipeline("text-to-speech", model="suno/bark-small")
15
 
16
+ # Image Generation Model (LoRA-based)
17
+ image_generator = pipeline("text-to-image", model="stabilityai/sdxl-turbo")
 
 
 
 
 
 
 
18
 
19
+ # Main Processing Function
 
 
 
 
 
 
 
 
 
20
  @spaces.GPU
21
+ def generate_cartoon(script_text):
22
+ # Step 1: Expand Script
23
+ prompt = f"""
24
+ You are a cartoon script writer. Convert the following story into a detailed cartoon scene plan.
25
+ For each scene, provide:
26
+ 1. Scene description (setting + action)
27
+ 2. Dialogue
28
+ 3. Characters involved
29
+ 4. Background description
 
 
 
 
 
 
30
 
31
+ Story:
32
+ """
33
+ {script_text}
34
+ """
35
+ Return result in JSON format.
36
+ """
37
+
38
+ response = text_generator(prompt, max_new_tokens=1024)[0]['generated_text']
39
+
40
+ try:
41
+ scene_data = json.loads(response)
42
+ except:
43
+ return "Script expansion failed. Please refine input."
44
+
45
+ characters = set()
46
+ backgrounds = set()
47
+ scene_clips = []
48
+
49
+ os.makedirs("generated_images", exist_ok=True)
50
+ os.makedirs("generated_audio", exist_ok=True)
51
+
52
+ for idx, scene in enumerate(scene_data.get("scenes", [])):
53
+ # Generate Background Image
54
+ background_prompt = scene.get("background_description", "cartoon background")
55
+ background_image = image_generator(background_prompt).images[0]
56
+ bg_path = f"generated_images/scene_{idx+1}.png"
57
+ background_image.save(bg_path)
58
+
59
+ # Generate TTS Audio
60
+ dialogue = scene.get("dialogue", "")
61
+ audio_output = tts_pipeline(dialogue)
62
+ audio_path = f"generated_audio/scene_{idx+1}.wav"
63
+ audio_output['audio'].export(audio_path, format="wav")
64
+
65
+ # Create Scene Video Clip
66
+ image_clip = ImageClip(bg_path).set_duration(5).fadein(1).fadeout(1)
67
+ image_clip = image_clip.set_audio(audio_path)
68
+ scene_clips.append(image_clip)
69
 
70
+ # Merge Scenes
71
+ final_video = concatenate_videoclips(scene_clips)
72
+ final_video_path = "final_cartoon_video.mp4"
73
+ final_video.write_videofile(final_video_path, fps=24)
74
 
75
+ return final_video_path
 
 
 
 
 
 
76
 
77
+ # Gradio UI
78
+ demo = gr.Interface(
79
+ fn=generate_cartoon,
80
+ inputs=gr.Textbox(label="Enter Cartoon Script", lines=10),
81
+ outputs=gr.Video(label="Generated Cartoon Video"),
82
+ title="Cartoon Video Generator",
83
+ description="Enter a short cartoon story script and generate a cartoon video (7-10 mins) automatically."
84
+ )
85
 
86
+ if __name__ == "__main__":
87
+ demo.launch()