test-100 / app.py
Kims12's picture
Update app.py
ddcfe75 verified
raw
history blame
7.19 kB
import json
import os
import time
import uuid
import tempfile
from PIL import Image
import gradio as gr
import base64
import mimetypes
import logging
from google import genai
from google.genai import types
# .env νŒŒμΌμ— μ €μž₯된 ν™˜κ²½λ³€μˆ˜ λ‘œλ“œ (python-dotenv μ„€μΉ˜ ν•„μš”: pip install python-dotenv)
from dotenv import load_dotenv
load_dotenv()
# λ‘œκΉ… μ„€μ • (둜그 레벨: DEBUG)
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def save_binary_file(file_name, data):
logger.debug(f"νŒŒμΌμ— 이진 데이터 μ €μž₯ 쀑: {file_name}")
with open(file_name, "wb") as f:
f.write(data)
logger.debug(f"파일 μ €μž₯ μ™„λ£Œ: {file_name}")
def generate(text, file_name, model="gemini-2.0-flash-exp-image-generation"):
logger.debug(f"generate ν•¨μˆ˜ μ‹œμž‘ - ν…μŠ€νŠΈ: '{text}', 파일λͺ…: '{file_name}', λͺ¨λΈ: '{model}'")
try:
# API ν‚€λŠ” ν™˜κ²½λ³€μˆ˜μ—μ„œ 뢈러옴
effective_api_key = os.environ.get("GEMINI_API_KEY")
if effective_api_key:
logger.debug("ν™˜κ²½λ³€μˆ˜μ—μ„œ API ν‚€ 뢈러옴")
else:
logger.error("API ν‚€κ°€ ν™˜κ²½λ³€μˆ˜μ— μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
raise ValueError("API ν‚€κ°€ ν•„μš”ν•©λ‹ˆλ‹€.")
client = genai.Client(api_key=effective_api_key)
logger.debug("Gemini ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” μ™„λ£Œ.")
# 파일 μ—…λ‘œλ“œ
files = [
client.files.upload(file=file_name),
]
logger.debug(f"파일 μ—…λ‘œλ“œ μ™„λ£Œ. URI: {files[0].uri}, MIME νƒ€μž…: {files[0].mime_type}")
# 컨텐츠 객체 생성: 파일 URI와 ν…μŠ€νŠΈ ν”„λ‘¬ν”„νŠΈλ₯Ό ν•¨κ»˜ 포함
contents = [
types.Content(
role="user",
parts=[
types.Part.from_uri(
file_uri=files[0].uri,
mime_type=files[0].mime_type,
),
types.Part.from_text(text=text),
],
),
]
logger.debug(f"컨텐츠 객체 생성 μ™„λ£Œ: {contents}")
generate_content_config = types.GenerateContentConfig(
temperature=1,
top_p=0.95,
top_k=40,
max_output_tokens=8192,
response_modalities=[
"image",
"text",
],
response_mime_type="text/plain",
)
logger.debug(f"생성 μ„€μ •: {generate_content_config}")
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
temp_path = tmp.name
logger.debug(f"μž„μ‹œ 파일 생성됨: {temp_path}")
response_stream = client.models.generate_content_stream(
model=model,
contents=contents,
config=generate_content_config,
)
logger.debug("응닡 슀트림 처리 μ‹œμž‘...")
for chunk in response_stream:
if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
logger.warning("chunk에 후보, 컨텐츠, λ˜λŠ” νŒŒνŠΈκ°€ μ—†μŠ΅λ‹ˆλ‹€. κ±΄λ„ˆλœλ‹ˆλ‹€.")
continue
inline_data = chunk.candidates[0].content.parts[0].inline_data
if inline_data:
save_binary_file(temp_path, inline_data.data)
logger.info(f"MIME νƒ€μž… {inline_data.mime_type}의 파일이 μ €μž₯됨: {temp_path} (ν”„λ‘¬ν”„νŠΈ: {text})")
else:
logger.info(f"μˆ˜μ‹ λœ ν…μŠ€νŠΈ: {chunk.text}")
print(chunk.text)
logger.debug(f"Raw chunk: {chunk}")
del files
logger.debug("μ—…λ‘œλ“œλœ 파일 정보 μ‚­μ œ μ™„λ£Œ.")
return temp_path
except Exception as e:
logger.exception("이미지 생성 쀑 였λ₯˜ λ°œμƒ:")
return None # 였λ₯˜ λ°œμƒ μ‹œ None λ°˜ν™˜
def process_image_and_prompt(composite_pil, prompt):
logger.debug(f"process_image_and_prompt ν•¨μˆ˜ μ‹œμž‘ - ν”„λ‘¬ν”„νŠΈ: '{prompt}'")
try:
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
composite_path = tmp.name
composite_pil.save(composite_path)
logger.debug(f"ν•©μ„± 이미지 μ €μž₯ μ™„λ£Œ: {composite_path}")
file_name = composite_path
input_text = prompt
model = "gemini-2.0-flash-exp-image-generation"
gemma_edited_image_path = generate(text=input_text, file_name=file_name, model=model)
if gemma_edited_image_path:
logger.debug(f"이미지 생성 μ™„λ£Œ. 경둜: {gemma_edited_image_path}")
result_img = Image.open(gemma_edited_image_path)
if result_img.mode == "RGBA":
result_img = result_img.convert("RGB")
return [result_img]
else:
logger.error("generate ν•¨μˆ˜μ—μ„œ None λ°˜ν™˜λ¨.")
return [] # 였λ₯˜ μ‹œ 빈 리슀트 λ°˜ν™˜
except Exception as e:
logger.exception("process_image_and_prompt ν•¨μˆ˜μ—μ„œ 였λ₯˜ λ°œμƒ:")
return [] # 였λ₯˜ μ‹œ 빈 리슀트 λ°˜ν™˜
# --- Gradio μΈν„°νŽ˜μ΄μŠ€ ꡬ성 ---
with gr.Blocks() as demo:
gr.HTML(
"""
<div style='display: flex; align-items: center; justify-content: center; gap: 20px'>
<div style="background-color: var(--block-background-fill); border-radius: 8px">
<img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;">
</div>
<div>
<h1>Geminiλ₯Ό μ΄μš©ν•œ 이미지 νŽΈμ§‘</h1>
<p>Gemini API ν‚€λŠ” ν™˜κ²½λ³€μˆ˜(GEMINI_API_KEY)둜 μ„€μ •λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.</p>
</div>
</div>
"""
)
gr.Markdown("이미지λ₯Ό μ—…λ‘œλ“œν•˜κ³ , νŽΈμ§‘ν•  λ‚΄μš©μ„ μž…λ ₯ν•˜μ„Έμš”.")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="이미지 μ—…λ‘œλ“œ", image_mode="RGBA")
prompt_input = gr.Textbox(
lines=2,
placeholder="νŽΈμ§‘ν•  λ‚΄μš©μ„ μž…λ ₯ν•˜μ„Έμš”...",
label="νŽΈμ§‘ ν”„λ‘¬ν”„νŠΈ"
)
submit_btn = gr.Button("이미지 νŽΈμ§‘ μ‹€ν–‰")
with gr.Column():
output_gallery = gr.Gallery(label="νŽΈμ§‘ κ²°κ³Ό")
submit_btn.click(
fn=process_image_and_prompt,
inputs=[image_input, prompt_input],
outputs=output_gallery,
)
# --- ν…ŒμŠ€νŠΈ μ½”λ“œ ---
# ν…ŒμŠ€νŠΈμš© 더미 이미지 (μ‹€μ œ μ΄λ―Έμ§€λ‘œ λŒ€μ²΄ κ°€λŠ₯)
dummy_image = Image.new("RGBA", (100, 100), color="red")
dummy_prompt = "이미지λ₯Ό νŒŒλž€μƒ‰μœΌλ‘œ λ³€κ²½ν•΄μ€˜"
logger.info("process_image_and_prompt ν•¨μˆ˜λ₯Ό 직접 ν˜ΈμΆœν•©λ‹ˆλ‹€...")
result = process_image_and_prompt(dummy_image, dummy_prompt)
if result:
logger.info(f"직접 호좜 성곡. κ²°κ³Ό: {result}")
else:
logger.error("직접 호좜 μ‹€νŒ¨.")
demo.launch(share=True)