Every-Text

Runtime error

App Files Files Community

Every-Text / app.py

ginipick

Update app.py

b2f5030 verified 8 months ago

raw

history blame

9.62 kB

	import os
	import time
	from os import path
	import tempfile
	import uuid
	import base64
	import mimetypes
	import json
	import io

	import torch
	from PIL import Image

	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download

	# Diffusers 관련 라이브러리
	import gradio as gr
	from diffusers import FluxPipeline

	# Google GenAI 라이브러리
	from google import genai
	from google.genai import types

	#######################################
	# 0. 환경설정
	#######################################

	BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
	CACHE_PATH = path.join(BASE_DIR, "models")

	os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
	os.environ["HF_HUB_CACHE"] = CACHE_PATH
	os.environ["HF_HOME"] = CACHE_PATH

	# 간단한 타이머 클래스
	class timer:
	def __init__(self, method_name="timed process"):
	self.method = method_name
	def __enter__(self):
	self.start = time.time()
	print(f"{self.method} starts")
	def __exit__(self, exc_type, exc_val, exc_tb):
	end = time.time()
	print(f"{self.method} took {str(round(end - self.start, 2))}s")

	#######################################
	# 1. FLUX 파이프라인 로드
	#######################################

	if not path.exists(CACHE_PATH):
	os.makedirs(CACHE_PATH, exist_ok=True)

	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	)

	lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")
	pipe.load_lora_weights(lora_path)
	pipe.fuse_lora(lora_scale=0.125)

	pipe.to(device="cuda", dtype=torch.bfloat16)

	#######################################
	# 2. Google GenAI를 통한 이미지 내 텍스트 변환 함수
	#######################################

	def save_binary_file(file_name, data):
	"""Google GenAI에서 응답받은 이진 데이터를 이미지 파일로 저장"""
	with open(file_name, "wb") as f:
	f.write(data)

	def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
	"""
	Google GenAI(gemini) 모델을 통해 이미지/텍스트를 생성하거나 변환.
	- text: 변경할 텍스트나 명령어 등 프롬프트
	- file_name: 원본 이미지(예: .png) 경로
	- model: 사용할 gemini 모델 이름
	"""
	# GAPI_TOKEN 환경변수에서 키를 가져옴 (필수)
	api_key = os.getenv("GAPI_TOKEN", None)
	if not api_key:
	raise ValueError(
	"GAPI_TOKEN 환경 변수가 설정되지 않았습니다. "
	"Google GenAI API를 사용하기 위해서는 GAPI_TOKEN이 필요합니다."
	)

	client = genai.Client(api_key=api_key)

	# 이미지 업로드
	files = [client.files.upload(file=file_name)]

	# gemini에 전달할 Content 준비
	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_uri(
	file_uri=files[0].uri,
	mime_type=files[0].mime_type,
	),
	types.Part.from_text(text=text),
	],
	),
	]

	generate_content_config = types.GenerateContentConfig(
	temperature=1,
	top_p=0.95,
	top_k=40,
	max_output_tokens=8192,
	response_modalities=["image", "text"],
	response_mime_type="text/plain",
	)

	text_response = ""
	image_path = None

	# 임시 파일에 이미지 응답을 저장할 준비
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	temp_path = tmp.name
	for chunk in client.models.generate_content_stream(
	model=model,
	contents=contents,
	config=generate_content_config,
	):
	if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
	continue
	candidate = chunk.candidates[0].content.parts[0]

	# inline_data(이미지) 응답인 경우
	if candidate.inline_data:
	save_binary_file(temp_path, candidate.inline_data.data)
	print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
	image_path = temp_path
	break
	else:
	text_response += chunk.text + "\n"

	del files
	return image_path, text_response

	#######################################
	# 3. Gradio 함수
	#######################################

	def generate_initial_image(prompt, text, height, width, steps, scale, seed):
	"""
	FLUX를 이용해 텍스트가 포함된 이미지를 생성
	- prompt 내에 <text>라는 특수 구분자가 있으면, 거기에 text가 치환됨.
	- 그렇지 않은 경우, 기존처럼 prompt 뒤에 “with clear readable text that says ...”를 추가.
	"""
	if "<text>" in prompt:
	combined_prompt = prompt.replace("<text>", text)
	else:
	combined_prompt = f"{prompt} with clear readable text that says '{text}'"

	with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("inference"):
	result = pipe(
	prompt=[combined_prompt],
	generator=torch.Generator().manual_seed(int(seed)),
	num_inference_steps=int(steps),
	guidance_scale=float(scale),
	height=int(height),
	width=int(width),
	max_sequence_length=256
	).images[0]

	return result

	def change_text_in_image(original_image, new_text):
	"""
	Gemini 모델을 통해,
	업로드된 이미지 내부의 문구를 `new_text`로 변경해주는 함수.
	"""
	try:
	# 임시 파일에 먼저 저장
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	original_path = tmp.name
	original_image.save(original_path)

	# Gemini 모델 호출
	image_path, text_response = generate_by_google_genai(
	text=f"Change the text in this image to: '{new_text}'",
	file_name=original_path
	)

	if image_path:
	# Gradio 구버전에는 decode_base64_to_image가 없으므로 PIL을 직접 사용
	with open(image_path, "rb") as f:
	image_data = f.read()
	modified_img = Image.open(io.BytesIO(image_data))
	return modified_img, ""
	else:
	return None, text_response

	except Exception as e:
	raise gr.Error(f"Error: {e}")

	#######################################
	# 4. Gradio 인터페이스
	#######################################

	with gr.Blocks(title="Flux + Google GenAI Text Replacement") as demo:
	gr.Markdown(
	"""
	# Flux Image Generation + Google GenAI Text Replacement

	Usage Instructions (in English)
	1. Write a prompt that may contain the special placeholder `<text>`.
	- Example: `A white cat says <text> in a cartoon style`.
	2. Enter the actual text in the "Text to Include in the Image" field.
	- Example: `안녕`
	3. Click the "Generate Base Image" button.
	- The prompt will be transformed so that `<text>` is replaced with your actual text.
	- If `<text>` is not found, the text will be appended automatically as `with clear readable text that says ...`.
	4. (Optional) If you want to change the text again, use the "Change Text in Image" button.

	---
	"""
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("## 1) Generate the Base Image (FLUX)")
	prompt_input = gr.Textbox(
	lines=3,
	label="Prompt (with optional `<text>` placeholder)",
	placeholder="e.g. A white cat says <text> in a cartoon style"
	)
	text_input = gr.Textbox(
	lines=1,
	label="Text to Include in the Image",
	placeholder="e.g. 안녕"
	)
	with gr.Accordion("Advanced Settings", open=False):
	height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
	width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
	steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
	scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5)
	seed = gr.Number(label="Seed (reproducibility)", value=1234, precision=0)

	generate_btn = gr.Button("Generate Base Image", variant="primary")
	generated_image = gr.Image(label="Generated Image", type="pil")

	with gr.Column():
	gr.Markdown("## 2) (Optional) Change Text in the Generated Image (Gemini)")
	new_text_input = gr.Textbox(
	label="New Text to Insert",
	placeholder="e.g. Hello"
	)
	modify_btn = gr.Button("Change Text in Image via Gemini", variant="secondary")
	output_img = gr.Image(label="Modified Image", type="pil")
	output_txt = gr.Textbox(label="(If only text returned)")

	# 버튼 액션 연결
	generate_btn.click(
	fn=generate_initial_image,
	inputs=[prompt_input, text_input, height, width, steps, scale, seed],
	outputs=[generated_image]
	)

	modify_btn.click(
	fn=change_text_in_image,
	inputs=[generated_image, new_text_input],
	outputs=[output_img, output_txt]
	)

	demo.launch(max_threads=20)