Every-Text

Runtime error

App Files Files Community

Every-Text / app.py

ginipick

Update app.py

f8cac3e verified 8 months ago

raw

history blame

9.97 kB

	import os
	import time
	from os import path
	import tempfile
	import uuid
	import base64
	import mimetypes
	import json
	import io

	import torch
	from PIL import Image

	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download

	# Diffusers 관련 라이브러리
	import gradio as gr
	from diffusers import FluxPipeline

	# Google GenAI 라이브러리
	from google import genai
	from google.genai import types

	#######################################
	# 0. 환경설정
	#######################################

	BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
	CACHE_PATH = path.join(BASE_DIR, "models")

	os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
	os.environ["HF_HUB_CACHE"] = CACHE_PATH
	os.environ["HF_HOME"] = CACHE_PATH

	# 간단한 타이머 클래스
	class timer:
	def __init__(self, method_name="timed process"):
	self.method = method_name
	def __enter__(self):
	self.start = time.time()
	print(f"{self.method} starts")
	def __exit__(self, exc_type, exc_val, exc_tb):
	end = time.time()
	print(f"{self.method} took {str(round(end - self.start, 2))}s")

	#######################################
	# 1. FLUX 파이프라인 로드
	#######################################

	if not path.exists(CACHE_PATH):
	os.makedirs(CACHE_PATH, exist_ok=True)

	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	)

	lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")
	pipe.load_lora_weights(lora_path)
	pipe.fuse_lora(lora_scale=0.125)

	pipe.to(device="cuda", dtype=torch.bfloat16)

	#######################################
	# 2. Google GenAI를 통한 이미지 내 텍스트 변환 함수
	#######################################

	def save_binary_file(file_name, data):
	"""Google GenAI에서 응답받은 이진 데이터를 이미지 파일로 저장"""
	with open(file_name, "wb") as f:
	f.write(data)

	def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
	"""
	Google GenAI(gemini) 모델을 통해 이미지/텍스트를 생성하거나 변환.
	- text: 변경할 텍스트나 명령어 등 프롬프트
	- file_name: 원본 이미지(예: .png) 경로
	- model: 사용할 gemini 모델 이름
	"""
	# (1) 환경 변수에서 API 키 가져오기 (필수)
	api_key = os.getenv("GAPI_TOKEN", None)
	if not api_key:
	raise ValueError(
	"GAPI_TOKEN 환경 변수가 설정되지 않았습니다. "
	"Google GenAI API를 사용하기 위해서는 GAPI_TOKEN이 필요합니다."
	)

	# (2) Google Client 초기화
	client = genai.Client(api_key=api_key)

	# (3) 이미지 업로드
	files = [client.files.upload(file=file_name)]

	# (4) gemini에 전달할 Content 준비 (이미지 + 프롬프트)
	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_uri(
	file_uri=files[0].uri,
	mime_type=files[0].mime_type,
	),
	types.Part.from_text(text=text),
	],
	),
	]

	# (5) 생성/변환 설정
	generate_content_config = types.GenerateContentConfig(
	temperature=1,
	top_p=0.95,
	top_k=40,
	max_output_tokens=8192,
	response_modalities=["image", "text"],
	response_mime_type="text/plain",
	)

	text_response = ""
	image_path = None

	# 임시 파일로 이미지 받을 준비
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	temp_path = tmp.name
	# 응답 스트림을 받으면서 이미지/텍스트 구분 처리
	for chunk in client.models.generate_content_stream(
	model=model,
	contents=contents,
	config=generate_content_config,
	):
	if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
	continue
	candidate = chunk.candidates[0].content.parts[0]

	# inline_data가 있으면 이미지 응답
	if candidate.inline_data:
	save_binary_file(temp_path, candidate.inline_data.data)
	print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
	image_path = temp_path
	break
	else:
	# 이미지 없이 텍스트만 반환되는 경우
	text_response += chunk.text + "\n"

	# 업로드한 File 객체 제거
	del files

	return image_path, text_response

	#######################################
	# 3. Gradio 함수
	# (1) FLUX로 이미지 생성 -> (2) Google GenAI로 텍스트 교체
	#######################################

	def generate_initial_image(prompt, text, height, width, steps, scale, seed):
	"""
	FLUX 파이프라인을 사용해 '텍스트가 포함된 이미지를' 먼저 생성.
	- prompt 내 <text>를 text로 치환
	- <text>가 없다면 "with clear readable text that says '<text>'"를 자동 붙임
	"""
	if "<text>" in prompt:
	combined_prompt = prompt.replace("<text>", text)
	else:
	combined_prompt = f"{prompt} with clear readable text that says '{text}'"

	# 디버그용: 최종 들어가는 프롬프트를 확인
	print(f"[DEBUG] Final combined_prompt: {combined_prompt}")

	with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("inference"):
	result = pipe(
	prompt=[combined_prompt],
	generator=torch.Generator().manual_seed(int(seed)),
	num_inference_steps=int(steps),
	guidance_scale=float(scale),
	height=int(height),
	width=int(width),
	max_sequence_length=256
	).images[0]

	return result

	def change_text_in_image(original_image, new_text):
	"""
	Google GenAI의 gemini 모델을 통해,
	업로드된 이미지 내부의 문구를 `new_text`로 변경해주는 함수.
	"""
	try:
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	original_path = tmp.name
	original_image.save(original_path)

	# Gemini 모델 호출
	image_path, text_response = generate_by_google_genai(
	text=f"Change the text in this image to: '{new_text}'",
	file_name=original_path
	)

	if image_path:
	# Gradio 구버전에는 decode_base64_to_image가 없으므로 PIL로 처리
	with open(image_path, "rb") as f:
	image_data = f.read()
	modified_img = Image.open(io.BytesIO(image_data))
	return modified_img, ""
	else:
	# 이미지가 없이 텍스트만 반환된 경우
	return None, text_response

	except Exception as e:
	raise gr.Error(f"Error: {e}")

	#######################################
	# 4. Gradio 인터페이스 구성
	#######################################

	with gr.Blocks(title="Flux + Google GenAI Text Replacement") as demo:
	gr.Markdown(
	"""
	# Flux 기반 이미지 생성 + Google GenAI를 통한 텍스트 변환

	Usage:
	- You can include `<text>` in the prompt. For example:
	`white cat with speech bubble says <text>`
	- Then, type the actual text in "Text to Include in the Image" (ex: "Hello" or "안녕").
	- If `<text>` is not found in your prompt, the text will be automatically appended as:
	`with clear readable text that says '<text>'`.
	- Finally, you can optionally change the text again via Gemini.

	---
	"""
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("## 1) Step 1: FLUX로 텍스트 포함 이미지 생성")
	prompt_input = gr.Textbox(
	lines=3,
	label="이미지 장면/배경 Prompt (use `<text>` placeholder if you like)",
	placeholder="e.g. A white cat with speech bubble says <text>"
	)
	text_input = gr.Textbox(
	lines=1,
	label="이미지 안에 들어갈 텍스트",
	placeholder="e.g. Hello or 안녕"
	)
	with gr.Accordion("고급 설정 (확장)", open=False):
	height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
	width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
	steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
	scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.5, value=3.5)
	seed = gr.Number(label="Seed (reproducibility)", value=1234, precision=0)

	generate_btn = gr.Button("Generate Base Image", variant="primary")
	generated_image = gr.Image(label="Generated Image (with text)", type="pil")

	with gr.Column():
	gr.Markdown("## 2) Step 2: 생성된 이미지 내 텍스트 수정")
	new_text_input = gr.Textbox(
	label="새로 바꿀 텍스트",
	placeholder="예) Hello world"
	)
	modify_btn = gr.Button("Change Text in Image via Gemini", variant="secondary")
	output_img = gr.Image(label="Modified Image", type="pil")
	output_txt = gr.Textbox(label="(If only text returned)")

	# 버튼 액션 연결
	generate_btn.click(
	fn=generate_initial_image,
	inputs=[prompt_input, text_input, height, width, steps, scale, seed],
	outputs=[generated_image]
	)

	modify_btn.click(
	fn=change_text_in_image,
	inputs=[generated_image, new_text_input],
	outputs=[output_img, output_txt]
	)

	demo.launch(max_threads=20)