Every-Text

Runtime error

App Files Files Community

Every-Text / app.py

ginipick

Update app.py

819fc44 verified 8 months ago

raw

history blame

10.4 kB

	import os
	import time
	from os import path
	import tempfile
	import uuid
	import base64
	import mimetypes
	import json
	import io
	import random
	import string

	import torch
	from PIL import Image

	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download

	# Diffusers 관련 라이브러리
	import gradio as gr
	from diffusers import FluxPipeline

	# Google GenAI 라이브러리
	from google import genai
	from google.genai import types

	#######################################
	# 0. 환경설정
	#######################################

	BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
	CACHE_PATH = path.join(BASE_DIR, "models")

	os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
	os.environ["HF_HUB_CACHE"] = CACHE_PATH
	os.environ["HF_HOME"] = CACHE_PATH

	# 타이머 클래스
	class timer:
	def __init__(self, method_name="timed process"):
	self.method = method_name
	def __enter__(self):
	self.start = time.time()
	print(f"{self.method} starts")
	def __exit__(self, exc_type, exc_val, exc_tb):
	end = time.time()
	print(f"{self.method} took {str(round(end - self.start, 2))}s")

	#######################################
	# 1. FLUX 파이프라인 로드
	#######################################

	if not path.exists(CACHE_PATH):
	os.makedirs(CACHE_PATH, exist_ok=True)

	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	)

	lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")
	pipe.load_lora_weights(lora_path)
	pipe.fuse_lora(lora_scale=0.125)

	pipe.to(device="cuda", dtype=torch.bfloat16)

	#######################################
	# 2. Google GenAI 모델로 텍스트 변환 함수
	#######################################

	def save_binary_file(file_name, data):
	"""Google GenAI에서 응답받은 이진 데이터를 이미지 파일로 저장"""
	with open(file_name, "wb") as f:
	f.write(data)

	def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
	"""
	Google GenAI(gemini) 모델을 통해 이미지/텍스트를 생성하거나 변환.
	- text: 변경할 텍스트나 명령어 등 프롬프트
	- file_name: 원본 이미지(예: .png) 경로
	- model: 사용할 gemini 모델 이름
	"""
	api_key = os.getenv("GAPI_TOKEN", None)
	if not api_key:
	raise ValueError(
	"GAPI_TOKEN 환경 변수가 설정되지 않았습니다. "
	"Google GenAI API 사용을 위해서는 GAPI_TOKEN이 필요합니다."
	)

	client = genai.Client(api_key=api_key)
	files = [client.files.upload(file=file_name)]

	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_uri(
	file_uri=files[0].uri,
	mime_type=files[0].mime_type,
	),
	types.Part.from_text(text=text),
	],
	),
	]

	generate_content_config = types.GenerateContentConfig(
	temperature=1,
	top_p=0.95,
	top_k=40,
	max_output_tokens=8192,
	response_modalities=["image", "text"],
	response_mime_type="text/plain",
	)

	text_response = ""
	image_path = None

	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	temp_path = tmp.name
	for chunk in client.models.generate_content_stream(
	model=model,
	contents=contents,
	config=generate_content_config,
	):
	if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
	continue
	candidate = chunk.candidates[0].content.parts[0]

	if candidate.inline_data:
	save_binary_file(temp_path, candidate.inline_data.data)
	print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
	image_path = temp_path
	break
	else:
	text_response += chunk.text + "\n"

	del files
	return image_path, text_response

	#######################################
	# 3. Diffusion + GoogleGenAI를 연결
	#######################################

	def generate_initial_image(prompt, text, height, width, steps, scale, seed):
	"""
	1) FLUX 파이프라인을 사용해 'text'가 들어간 이미지를 생성
	- prompt 내 <text>가 있으면 치환, 없으면 자동 추가
	"""
	if "<text>" in prompt:
	combined_prompt = prompt.replace("<text>", text)
	else:
	combined_prompt = f"{prompt} with clear readable text that says '{text}'"

	print("[DEBUG] combined_prompt:", combined_prompt)

	with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("GenerateInitialImage"):
	result = pipe(
	prompt=[combined_prompt],
	generator=torch.Generator().manual_seed(int(seed)),
	num_inference_steps=int(steps),
	guidance_scale=float(scale),
	height=int(height),
	width=int(width),
	max_sequence_length=256
	).images[0]

	return result

	def change_text_in_image(original_image, new_text):
	"""
	2) Gemini 모델을 통해,
	original_image 내 텍스트를 `new_text`로 변경한 이미지 반환
	"""
	try:
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	original_path = tmp.name
	original_image.save(original_path)

	image_path, text_response = generate_by_google_genai(
	text=f"Change the text in this image to: '{new_text}'",
	file_name=original_path
	)

	if image_path:
	with open(image_path, "rb") as f:
	image_data = f.read()
	modified_img = Image.open(io.BytesIO(image_data))
	return modified_img
	else:
	# 이미지가 없이 텍스트만 응답된 경우
	return None

	except Exception as e:
	raise gr.Error(f"Error: {e}")

	#######################################
	# 4. 임의 알파벳 생성
	#######################################

	def generate_random_letters(length: int) -> str:
	"""
	length 길이의 임의 알파벳(대소문자) 문자열 생성
	"""
	letters = string.ascii_lowercase + string.ascii_uppercase
	return "".join(random.choice(letters) for _ in range(length))

	#######################################
	# 5. 최종 함수: 버튼 한 번으로
	# (1) 무작위 알파벳으로 1차 이미지 생성
	# (2) 진짜 "새로 바꿀 텍스트"로 2차 이미지 생성
	#######################################

	def run_full_process(prompt, final_text, height, width, steps, scale, seed):
	"""
	- final_text의 길이에 맞춰 랜덤 알파벳을 생성 -> 1차 이미지
	- 그 1차 이미지를 바탕으로, final_text로 교체 -> 2차 최종 이미지
	"""
	# (A) 새로 바꿀 텍스트(final_text) 글자수만큼 임의 알파벳 생성
	random_len = len(final_text)
	random_text = generate_random_letters(random_len)
	print(f"[STEP] final_text='{final_text}' => random_text='{random_text}'")

	# (B) 1차 이미지: 무작위 알파벳으로 생성
	random_image = generate_initial_image(prompt, random_text, height, width, steps, scale, seed)

	# (C) 2차 이미지: 실제 final_text로 교체
	final_image = change_text_in_image(random_image, final_text)

	return [random_image, final_image]

	#######################################
	# 6. Gradio UI
	#######################################

	with gr.Blocks(title="Flux + Google GenAI (Random & Then Real Text)") as demo:
	gr.Markdown(
	"""
	# Flux + Google GenAI: 두 단계에 걸친 텍스트 교체

	사용 흐름
	1) Prompt에 장면이나 스타일을 작성 (필요하면 `<text>` 구분자 사용)
	2) "새로 바꿀 텍스트" 에 최종 원하는 문자열을 입력 (예: "안녕하세요")
	3) "Generate Images" 버튼을 누르면,
	- 먼저 "새로 바꿀 텍스트" 길이에 맞는 무작위 알파벳을 넣어 이미지 생성 (1차 이미지)
	- 이어서 진짜 "새로 바꿀 텍스트"로 다시 교체(2차 최종 이미지)
	4) 결과로 두 장의 이미지를 확인할 수 있습니다.

	주요 포인트
	- "이미지 안에 들어갈 텍스트"는 UI에 노출되지 않으며(사용자 입력 불가), 오직 내부에서 자동 설정됩니다.
	- 1차 이미지는 완전히 임의의 알파벳 텍스트를 포함합니다.
	- 2차 이미지는 최종적으로 사용자가 입력한 "새로 바꿀 텍스트"를 포함합니다.
	"""
	)

	with gr.Row():
	with gr.Column():
	prompt_input = gr.Textbox(
	lines=3,
	label="Prompt (use `<text>` if you want)",
	placeholder="e.g. A white cat with a speech bubble <text>"
	)
	final_text_input = gr.Textbox(
	lines=1,
	label="새로 바꿀 텍스트",
	placeholder="예) 안녕하세요"
	)
	with gr.Accordion("고급 설정 (확장)", open=False):
	height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
	width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
	steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
	scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.5, value=3.5)
	seed = gr.Number(label="Seed (reproducibility)", value=1234, precision=0)

	run_btn = gr.Button("Generate Images", variant="primary")

	with gr.Column():
	random_image_output = gr.Image(label="1) Random Text Image", type="pil")
	final_image_output = gr.Image(label="2) Final Text Image", type="pil")

	# 버튼 액션: 위 단계들을 모두 실행 -> 결과 2장 출력
	run_btn.click(
	fn=run_full_process,
	inputs=[prompt_input, final_text_input, height, width, steps, scale, seed],
	outputs=[random_image_output, final_image_output]
	)

	demo.launch(max_threads=20)