webtoon-gen

Running on Zero

App Files Files Community

webtoon-gen / app.py

openfree

Update app.py

43a7c4d verified about 2 months ago

raw

history blame

15 kB

	import os
	import gc
	import uuid
	import random
	import tempfile
	import time
	from datetime import datetime
	from typing import Any
	from huggingface_hub import login, hf_hub_download
	import spaces

	import gradio as gr
	import numpy as np
	import torch
	from PIL import Image, ImageDraw, ImageFont
	from diffusers import FluxPipeline
	from transformers import pipeline

	# 메모리 정리 함수
	def clear_memory():
	gc.collect()
	try:
	if torch.cuda.is_available():
	with torch.cuda.device(0):
	torch.cuda.empty_cache()
	except:
	pass

	# GPU 설정
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

	if torch.cuda.is_available():
	try:
	with torch.cuda.device(0):
	torch.cuda.empty_cache()
	torch.backends.cudnn.benchmark = True
	torch.backends.cuda.matmul.allow_tf32 = True
	except:
	print("Warning: Could not configure CUDA settings")

	# HF 토큰 설정
	HF_TOKEN = os.getenv("HF_TOKEN")
	if HF_TOKEN is None:
	raise ValueError("Please set the HF_TOKEN environment variable")

	try:
	login(token=HF_TOKEN)
	except Exception as e:
	raise ValueError(f"Failed to login to Hugging Face: {str(e)}")



	translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en", device=-1) # CPU에서 실행

	def translate_to_english(text: str) -> str:
	"""한글 텍스트를 영어로 번역"""
	try:
	if any(ord('가') <= ord(char) <= ord('힣') for char in text):
	translated = translator(text, max_length=128)[0]['translation_text']
	print(f"Translated '{text}' to '{translated}'")
	return translated
	return text
	except Exception as e:
	print(f"Translation error: {str(e)}")
	return text


	# FLUX 파이프라인 초기화 부분 수정
	print("Initializing FLUX pipeline...")
	try:
	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.float16,
	use_auth_token=HF_TOKEN
	)
	print("FLUX pipeline initialized successfully")

	# 메모리 최적화 설정
	pipe.enable_attention_slicing(slice_size=1)

	# GPU 설정
	if torch.cuda.is_available():
	pipe = pipe.to("cuda:0")
	torch.cuda.empty_cache()
	torch.backends.cudnn.benchmark = True
	torch.backends.cuda.matmul.allow_tf32 = True

	print("Pipeline optimization settings applied")

	except Exception as e:
	print(f"Error initializing FLUX pipeline: {str(e)}")
	raise

	# LoRA 가중치 로드 부분 수정
	print("Loading LoRA weights...")
	try:
	# 로컬 LoRA 파일의 절대 경로 확인
	current_dir = os.path.dirname(os.path.abspath(__file__))
	lora_path = os.path.join(current_dir, "myt-flux-fantasy.safetensors")

	if not os.path.exists(lora_path):
	raise FileNotFoundError(f"LoRA file not found at: {lora_path}")

	print(f"Loading LoRA weights from: {lora_path}")

	# LoRA 가중치 로드
	pipe.load_lora_weights(lora_path)
	pipe.fuse_lora(lora_scale=0.75) # lora_scale 값 조정

	# 메모리 정리
	torch.cuda.empty_cache()
	gc.collect()

	print("LoRA weights loaded and fused successfully")
	print(f"Current device: {pipe.device}")

	except Exception as e:
	print(f"Error loading LoRA weights: {str(e)}")
	print(f"Full error details: {repr(e)}")
	raise ValueError(f"Failed to load LoRA weights: {str(e)}")


	@spaces.GPU(duration=60)
	def generate_image(
	prompt: str,
	seed: int,
	randomize_seed: bool,
	width: int,
	height: int,
	guidance_scale: float,
	num_inference_steps: int,
	progress: gr.Progress = gr.Progress()
	):
	try:
	clear_memory()

	translated_prompt = translate_to_english(prompt)
	print(f"Processing prompt: {translated_prompt}")

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	generator = torch.Generator(device=device).manual_seed(seed)

	print(f"Current device: {pipe.device}")
	print(f"Starting image generation...")

	with torch.inference_mode(), torch.cuda.amp.autocast(enabled=True):
	image = pipe(
	prompt=translated_prompt,
	width=width,
	height=height,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator,
	num_images_per_prompt=1,
	).images[0]

	filepath = save_generated_image(image, translated_prompt)
	print(f"Image generated and saved to: {filepath}")
	return image, seed

	except Exception as e:
	print(f"Generation error: {str(e)}")
	print(f"Full error details: {repr(e)}")
	raise gr.Error(f"Image generation failed: {str(e)}")
	finally:
	clear_memory()

	# 저장 디렉토리 설정
	SAVE_DIR = "saved_images"
	if not os.path.exists(SAVE_DIR):
	os.makedirs(SAVE_DIR, exist_ok=True)

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1024

	def save_generated_image(image, prompt):
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	unique_id = str(uuid.uuid4())[:8]
	filename = f"{timestamp}_{unique_id}.png"
	filepath = os.path.join(SAVE_DIR, filename)
	image.save(filepath)
	return filepath



	def add_text_with_stroke(draw, text, x, y, font, text_color, stroke_width):
	"""텍스트에 외곽선을 추가하는 함수"""
	for adj_x in range(-stroke_width, stroke_width + 1):
	for adj_y in range(-stroke_width, stroke_width + 1):
	draw.text((x + adj_x, y + adj_y), text, font=font, fill=text_color)

	def add_text_to_image(
	input_image,
	text,
	font_size,
	color,
	opacity,
	x_position,
	y_position,
	thickness,
	text_position_type,
	font_choice
	):
	try:
	if input_image is None or text.strip() == "":
	return input_image

	if not isinstance(input_image, Image.Image):
	if isinstance(input_image, np.ndarray):
	image = Image.fromarray(input_image)
	else:
	raise ValueError("Unsupported image type")
	else:
	image = input_image.copy()

	if image.mode != 'RGBA':
	image = image.convert('RGBA')

	font_files = {
	"Default": "DejaVuSans.ttf",
	"Korean Regular": "ko-Regular.ttf"
	}

	try:
	font_file = font_files.get(font_choice, "DejaVuSans.ttf")
	font = ImageFont.truetype(font_file, int(font_size))
	except Exception as e:
	print(f"Font loading error ({font_choice}): {str(e)}")
	font = ImageFont.load_default()

	color_map = {
	'White': (255, 255, 255),
	'Black': (0, 0, 0),
	'Red': (255, 0, 0),
	'Green': (0, 255, 0),
	'Blue': (0, 0, 255),
	'Yellow': (255, 255, 0),
	'Purple': (128, 0, 128)
	}
	rgb_color = color_map.get(color, (255, 255, 255))

	temp_draw = ImageDraw.Draw(image)
	text_bbox = temp_draw.textbbox((0, 0), text, font=font)
	text_width = text_bbox[2] - text_bbox[0]
	text_height = text_bbox[3] - text_bbox[1]

	actual_x = int((image.width - text_width) * (x_position / 100))
	actual_y = int((image.height - text_height) * (y_position / 100))

	text_color = (*rgb_color, int(opacity))

	txt_overlay = Image.new('RGBA', image.size, (255, 255, 255, 0))
	draw = ImageDraw.Draw(txt_overlay)

	add_text_with_stroke(
	draw,
	text,
	actual_x,
	actual_y,
	font,
	text_color,
	int(thickness)
	)
	output_image = Image.alpha_composite(image, txt_overlay)

	output_image = output_image.convert('RGB')

	return output_image

	except Exception as e:
	print(f"Error in add_text_to_image: {str(e)}")
	return input_image


	css = """
	footer {display: none}
	.main-title {
	text-align: center;
	margin: 1em 0;
	padding: 1.5em;
	background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
	border-radius: 15px;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1);
	}
	.main-title h1 {
	color: #2196F3;
	font-size: 2.8em;
	margin-bottom: 0.3em;
	font-weight: 700;
	}
	.main-title p {
	color: #555;
	font-size: 1.3em;
	line-height: 1.4;
	}
	.container {
	max-width: 1200px;
	margin: auto;
	padding: 20px;
	}
	.input-panel, .output-panel {
	background: white;
	padding: 1.5em;
	border-radius: 12px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.08);
	margin-bottom: 1em;
	}
	"""

	import requests

	def enhance_prompt(prompt: str) -> str:
	"""프롬프트를 애니메이션 스타일로 증강"""
	try:
	# 기본 품질 향상 프롬프트 추가
	enhancements = [
	"masterpiece, best quality, highly detailed",
	"anime style, animation style",
	"vibrant colors, perfect lighting",
	"professional composition",
	"dynamic pose, expressive features",
	"detailed background, perfect shadows",
	"[trigger]"
	]

	# 애니메이션 스타일 프롬프트 변환
	anime_style_prompt = f"an animated {prompt}, detailed anime art style"

	# 최종 프롬프트 구성
	final_prompt = f"{anime_style_prompt}, {', '.join(enhancements)}"
	print(f"Enhanced prompt: {final_prompt}")

	return final_prompt
	except Exception as e:
	print(f"Prompt enhancement failed: {str(e)}")
	return prompt

	# 기존의 pipeline 초기화 부분 제거
	# try:
	# prompt_enhancer = pipeline(...)
	# except Exception as e:
	# print(f"Error initializing prompt enhancer: {str(e)}")
	# prompt_enhancer = None


	with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
	gr.HTML("""
	<div class="main-title">
	<h1>🎨 Webtoon Studio</h1>
	<p>Generate webtoon-style images and add text with various styles and positions.</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gen_prompt = gr.Textbox(
	label="Generation Prompt",
	placeholder="Enter your image generation prompt..."
	)
	enhance_btn = gr.Button("✨ Enhance Prompt", variant="secondary")

	with gr.Row():
	gen_width = gr.Slider(512, 1024, 768, step=64, label="Width")
	gen_height = gr.Slider(512, 1024, 768, step=64, label="Height")

	with gr.Row():
	guidance_scale = gr.Slider(1, 20, 7.5, step=0.5, label="Guidance Scale")
	num_steps = gr.Slider(1, 50, 30, step=1, label="Number of Steps")

	with gr.Row():
	seed = gr.Number(label="Seed", value=-1)
	randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)

	generate_btn = gr.Button("Generate Image", variant="primary")

	output_image = gr.Image(
	label="Generated Image",
	type="pil",
	show_download_button=True
	)
	output_seed = gr.Number(label="Used Seed", interactive=False)

	# 텍스트 추가 섹션
	with gr.Accordion("Text Options", open=False):
	text_input = gr.Textbox(
	label="Text Content",
	placeholder="Enter text to add..."
	)
	text_position_type = gr.Radio(
	choices=["Text Over Image"],
	value="Text Over Image",
	label="Text Position",
	visible=True
	)
	with gr.Row():
	font_choice = gr.Dropdown(
	choices=["Default", "Korean Regular"],
	value="Default",
	label="Font Selection",
	interactive=True
	)
	font_size = gr.Slider(
	minimum=10,
	maximum=200,
	value=40,
	step=5,
	label="Font Size"
	)
	with gr.Row():
	color_dropdown = gr.Dropdown(
	choices=["White", "Black", "Red", "Green", "Blue", "Yellow", "Purple"],
	value="White",
	label="Text Color"
	)
	thickness = gr.Slider(
	minimum=0,
	maximum=10,
	value=1,
	step=1,
	label="Text Thickness"
	)
	with gr.Row():
	opacity_slider = gr.Slider(
	minimum=0,
	maximum=255,
	value=255,
	step=1,
	label="Opacity"
	)
	with gr.Row():
	x_position = gr.Slider(
	minimum=0,
	maximum=100,
	value=50,
	step=1,
	label="Left(0%)~Right(100%)"
	)
	y_position = gr.Slider(
	minimum=0,
	maximum=100,
	value=50,
	step=1,
	label="High(0%)~Low(100%)"
	)
	add_text_btn = gr.Button("Apply Text", variant="primary")

	# 이벤트 바인딩
	generate_btn.click(
	fn=generate_image,
	inputs=[
	gen_prompt,
	seed,
	randomize_seed,
	gen_width,
	gen_height,
	guidance_scale,
	num_steps,
	],
	outputs=[output_image, output_seed]
	)

	add_text_btn.click(
	fn=add_text_to_image,
	inputs=[
	output_image,
	text_input,
	font_size,
	color_dropdown,
	opacity_slider,
	x_position,
	y_position,
	thickness,
	text_position_type,
	font_choice
	],
	outputs=output_image
	)

	# 이벤트 바인딩 추가
	def update_prompt(prompt):
	enhanced = enhance_prompt(prompt)
	return enhanced

	enhance_btn.click(
	fn=update_prompt,
	inputs=[gen_prompt],
	outputs=[gen_prompt]
	)

	demo.queue(max_size=5)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	max_threads=2
	)