Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from PIL import Image
|
|
4 |
import gradio as gr
|
5 |
import logging
|
6 |
import re
|
|
|
7 |
from io import BytesIO
|
8 |
|
9 |
from google import genai
|
@@ -21,115 +22,111 @@ def save_binary_file(file_name, data):
|
|
21 |
with open(file_name, "wb") as f:
|
22 |
f.write(data)
|
23 |
|
24 |
-
def translate_prompt_to_english(prompt):
|
25 |
-
"""
|
26 |
-
์
๋ ฅ๋ ํ๋กฌํํธ์ ํ๊ธ์ด ํฌํจ๋์ด ์์ผ๋ฉด Geminiโ2.0โflash ๋ชจ๋ธ์ ์ฌ์ฉํ์ฌ ์์ด๋ก ๋ฒ์ญํฉ๋๋ค.
|
27 |
-
ํ๊ธ์ด ์์ผ๋ฉด ์๋ณธ ํ๋กฌํํธ๋ฅผ ๊ทธ๋๋ก ๋ฐํํฉ๋๋ค.
|
28 |
-
"""
|
29 |
-
if not re.search("[๊ฐ-ํฃ]", prompt):
|
30 |
-
return prompt
|
31 |
-
try:
|
32 |
-
api_key = os.environ.get("GEMINI_API_KEY")
|
33 |
-
if not api_key:
|
34 |
-
logger.error("Gemini API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.")
|
35 |
-
return prompt
|
36 |
-
client = genai.Client(api_key=api_key)
|
37 |
-
translation_prompt = f"Translate the following Korean text to English:\n\n{prompt}"
|
38 |
-
logger.info(f"Translation prompt: {translation_prompt}")
|
39 |
-
response = client.models.generate_content(
|
40 |
-
model="gemini-2.0-flash",
|
41 |
-
contents=[translation_prompt],
|
42 |
-
config=types.GenerateContentConfig(
|
43 |
-
response_modalities=['Text'],
|
44 |
-
temperature=0.2,
|
45 |
-
top_p=0.95,
|
46 |
-
top_k=40,
|
47 |
-
max_output_tokens=512
|
48 |
-
)
|
49 |
-
)
|
50 |
-
translated_text = ""
|
51 |
-
for part in response.candidates[0].content.parts:
|
52 |
-
if hasattr(part, 'text') and part.text:
|
53 |
-
translated_text += part.text
|
54 |
-
if translated_text.strip():
|
55 |
-
logger.info(f"Translated text: {translated_text.strip()}")
|
56 |
-
return translated_text.strip()
|
57 |
-
else:
|
58 |
-
logger.warning("๋ฒ์ญ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค. ์๋ณธ ํ๋กฌํํธ ์ฌ์ฉ")
|
59 |
-
return prompt
|
60 |
-
except Exception as e:
|
61 |
-
logger.exception("๋ฒ์ญ ์ค ์ค๋ฅ ๋ฐ์:")
|
62 |
-
return prompt
|
63 |
-
|
64 |
def preprocess_prompt(prompt, image1, image2, image3):
|
65 |
"""
|
66 |
ํ๋กฌํํธ๋ฅผ ์ฒ๋ฆฌํ๊ณ ๊ธฐ๋ฅ ๋ช
๋ น์ ํด์
|
67 |
"""
|
|
|
|
|
68 |
has_img1 = image1 is not None
|
69 |
has_img2 = image2 is not None
|
70 |
has_img3 = image3 is not None
|
71 |
-
|
|
|
72 |
if "#1" in prompt and not has_img1:
|
73 |
prompt = prompt.replace("#1", "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)")
|
74 |
else:
|
75 |
prompt = prompt.replace("#1", "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง")
|
76 |
-
|
77 |
if "#2" in prompt and not has_img2:
|
78 |
prompt = prompt.replace("#2", "๋ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)")
|
79 |
else:
|
80 |
prompt = prompt.replace("#2", "๋ ๋ฒ์งธ ์ด๋ฏธ์ง")
|
81 |
-
|
82 |
if "#3" in prompt and not has_img3:
|
83 |
prompt = prompt.replace("#3", "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)")
|
84 |
else:
|
85 |
prompt = prompt.replace("#3", "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง")
|
86 |
-
|
|
|
87 |
if "1. ์ด๋ฏธ์ง ๋ณ๊ฒฝ" in prompt:
|
|
|
88 |
desc_match = re.search(r'#1์ "(.*?)"์ผ๋ก ๋ฐ๊ฟ๋ผ', prompt)
|
89 |
if desc_match:
|
90 |
description = desc_match.group(1)
|
91 |
prompt = f"์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ {description}์ผ๋ก ๋ณ๊ฒฝํด์ฃผ์ธ์. ์๋ณธ ์ด๋ฏธ์ง์ ์ฃผ์ ๋ด์ฉ์ ์ ์งํ๋ ์๋ก์ด ์คํ์ผ๊ณผ ๋ถ์๊ธฐ๋ก ์ฌํด์ํด์ฃผ์ธ์."
|
92 |
else:
|
93 |
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ฐฝ์์ ์ผ๋ก ๋ณํํด์ฃผ์ธ์. ๋ ์์ํ๊ณ ์์ ์ ์ธ ๋ฒ์ ์ผ๋ก ๋ง๋ค์ด์ฃผ์ธ์."
|
94 |
-
|
95 |
elif "2. ๊ธ์์ง์ฐ๊ธฐ" in prompt:
|
|
|
96 |
text_match = re.search(r'#1์์ "(.*?)"๋ฅผ ์ง์๋ผ', prompt)
|
97 |
if text_match:
|
98 |
text_to_remove = text_match.group(1)
|
99 |
prompt = f"์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์์ '{text_to_remove}' ํ
์คํธ๋ฅผ ์ฐพ์ ์์ฐ์ค๋ฝ๊ฒ ์ ๊ฑฐํด์ฃผ์ธ์. ํ
์คํธ๊ฐ ์๋ ๋ถ๋ถ์ ๋ฐฐ๊ฒฝ๊ณผ ์กฐํ๋กญ๊ฒ ์ฑ์์ฃผ์ธ์."
|
100 |
else:
|
101 |
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์์ ๋ชจ๋ ํ
์คํธ๋ฅผ ์ฐพ์ ์์ฐ์ค๋ฝ๊ฒ ์ ๊ฑฐํด์ฃผ์ธ์. ๊น๋ํ ์ด๋ฏธ์ง๋ก ๋ง๋ค์ด์ฃผ์ธ์."
|
102 |
-
|
|
|
|
|
|
|
103 |
elif "4. ์ท๋ฐ๊พธ๊ธฐ" in prompt:
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
106 |
elif "5. ๋ฐฐ๊ฒฝ๋ฐ๊พธ๊ธฐ" in prompt:
|
107 |
-
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ผ๋ก
|
108 |
-
|
109 |
elif "6. ์ด๋ฏธ์ง ํฉ์ฑ(์ํํฌํจ)" in prompt:
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
prompt += " ์ด๋ฏธ์ง๋ฅผ ์์ฑํด์ฃผ์ธ์."
|
|
|
113 |
return prompt
|
114 |
|
115 |
def generate_with_images(prompt, images):
|
116 |
"""
|
117 |
-
|
118 |
"""
|
119 |
try:
|
|
|
120 |
api_key = os.environ.get("GEMINI_API_KEY")
|
121 |
if not api_key:
|
122 |
return None, "API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. ํ๊ฒฝ๋ณ์๋ฅผ ํ์ธํด์ฃผ์ธ์."
|
123 |
-
|
|
|
124 |
client = genai.Client(api_key=api_key)
|
|
|
125 |
logger.info(f"Gemini API ์์ฒญ ์์ - ํ๋กฌํํธ: {prompt}")
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
for idx, img in enumerate(images, 1):
|
129 |
if img is not None:
|
130 |
contents.append(img)
|
131 |
logger.info(f"์ด๋ฏธ์ง #{idx} ์ถ๊ฐ๋จ")
|
132 |
-
|
|
|
133 |
response = client.models.generate_content(
|
134 |
model="gemini-2.0-flash-exp-image-generation",
|
135 |
contents=contents,
|
@@ -141,11 +138,15 @@ def generate_with_images(prompt, images):
|
|
141 |
max_output_tokens=8192
|
142 |
)
|
143 |
)
|
144 |
-
|
|
|
145 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
146 |
temp_path = tmp.name
|
|
|
147 |
result_text = ""
|
148 |
image_found = False
|
|
|
|
|
149 |
for part in response.candidates[0].content.parts:
|
150 |
if hasattr(part, 'text') and part.text:
|
151 |
result_text += part.text
|
@@ -154,116 +155,140 @@ def generate_with_images(prompt, images):
|
|
154 |
save_binary_file(temp_path, part.inline_data.data)
|
155 |
image_found = True
|
156 |
logger.info("์๋ต์์ ์ด๋ฏธ์ง ์ถ์ถ ์ฑ๊ณต")
|
|
|
157 |
if not image_found:
|
158 |
return None, f"API์์ ์ด๋ฏธ์ง๋ฅผ ์์ฑํ์ง ๋ชปํ์ต๋๋ค. ์๋ต ํ
์คํธ: {result_text}"
|
|
|
|
|
159 |
result_img = Image.open(temp_path)
|
160 |
if result_img.mode == "RGBA":
|
161 |
result_img = result_img.convert("RGB")
|
|
|
162 |
return result_img, f"์ด๋ฏธ์ง๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์์ฑ๋์์ต๋๋ค. {result_text}"
|
|
|
163 |
except Exception as e:
|
164 |
logger.exception("์ด๋ฏธ์ง ์์ฑ ์ค ์ค๋ฅ ๋ฐ์:")
|
165 |
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}"
|
166 |
|
167 |
def process_images_with_prompt(image1, image2, image3, prompt):
|
168 |
"""
|
169 |
-
3๊ฐ์ ์ด๋ฏธ์ง์ ํ๋กฌํํธ๋ฅผ
|
170 |
-
API๋ฅผ ํธ์ถํ์ฌ ๊ฒฐ๊ณผ ์ด๋ฏธ์ง๋ฅผ ๋ฐํํฉ๋๋ค.
|
171 |
"""
|
172 |
try:
|
|
|
173 |
images = [image1, image2, image3]
|
174 |
valid_images = [img for img in images if img is not None]
|
|
|
175 |
if not valid_images:
|
176 |
-
return None, "์ ์ด๋ ํ๋์ ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํด์ฃผ์ธ์."
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
final_prompt = translate_prompt_to_english(processed_prompt)
|
182 |
-
else:
|
183 |
-
final_prompt = processed_prompt
|
184 |
-
else:
|
185 |
if len(valid_images) == 1:
|
186 |
-
|
187 |
logger.info("Default prompt generated for single image")
|
188 |
elif len(valid_images) == 2:
|
189 |
-
|
190 |
logger.info("Default prompt generated for two images")
|
191 |
else:
|
192 |
-
|
193 |
logger.info("Default prompt generated for three images")
|
194 |
-
|
195 |
-
|
196 |
-
|
|
|
|
|
|
|
|
|
197 |
except Exception as e:
|
198 |
logger.exception("์ด๋ฏธ์ง ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์:")
|
199 |
-
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}"
|
200 |
|
201 |
-
|
202 |
-
try:
|
203 |
-
result_img, status, final_prompt = process_images_with_prompt(image1, image2, image3, prompt)
|
204 |
-
return result_img, status, final_prompt
|
205 |
-
except Exception as e:
|
206 |
-
logger.exception("์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์:")
|
207 |
-
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}", prompt
|
208 |
|
|
|
209 |
with gr.Blocks() as demo:
|
210 |
gr.HTML(
|
211 |
"""
|
212 |
<div style="text-align: center; margin-bottom: 1rem;">
|
213 |
-
<h1
|
214 |
-
<p
|
215 |
</div>
|
216 |
"""
|
217 |
)
|
218 |
|
219 |
with gr.Row():
|
220 |
with gr.Column():
|
221 |
-
|
222 |
-
|
223 |
-
label="
|
224 |
-
image_mode="
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
lines=1,
|
229 |
-
placeholder="Enter Gemini API Key (optional)",
|
230 |
-
label="Gemini API Key (optional)"
|
231 |
-
)
|
232 |
prompt_input = gr.Textbox(
|
233 |
-
lines=
|
234 |
-
placeholder="
|
235 |
-
label="
|
236 |
)
|
237 |
-
|
|
|
|
|
|
|
238 |
with gr.Column():
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
prompt_display = gr.Textbox(label="
|
245 |
-
|
246 |
-
gr.Markdown("## Try these examples", elem_classes="gr-examples-header")
|
247 |
-
|
248 |
-
examples = [
|
249 |
-
["down/1_in-1.png", "#1 ์ด๋ฏธ์ง์ [์ฒญ์ ์์ด๋ ๊ณ ๋ฅผ ๊ฒ์ ๊ณ ๋๋ ๊ณ ]์ผ๋ก ๋ณ๊ฒฝํ๋ผ.", ""],
|
250 |
-
["down/2_in-1.png", "#1 ์ด๋ฏธ์ง์ [์ค๊ตญ์ด๋ฅผ ๋ชจ๋]๋ฅผ ์ ๊ฑฐํ๋ผ.", ""],
|
251 |
-
]
|
252 |
-
|
253 |
-
gr.Examples(
|
254 |
-
fn=lambda img, pr: process_and_show_prompt(img, None, None, pr),
|
255 |
-
examples=examples,
|
256 |
-
inputs=[image_input, prompt_input],
|
257 |
-
outputs=[output_gallery, output_text, prompt_display],
|
258 |
-
run_on_click=True,
|
259 |
-
elem_id="examples-grid"
|
260 |
-
)
|
261 |
|
262 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
submit_btn.click(
|
264 |
-
fn=
|
265 |
-
inputs=[
|
266 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
)
|
268 |
|
269 |
-
|
|
|
|
|
|
4 |
import gradio as gr
|
5 |
import logging
|
6 |
import re
|
7 |
+
import io
|
8 |
from io import BytesIO
|
9 |
|
10 |
from google import genai
|
|
|
22 |
with open(file_name, "wb") as f:
|
23 |
f.write(data)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def preprocess_prompt(prompt, image1, image2, image3):
|
26 |
"""
|
27 |
ํ๋กฌํํธ๋ฅผ ์ฒ๋ฆฌํ๊ณ ๊ธฐ๋ฅ ๋ช
๋ น์ ํด์
|
28 |
"""
|
29 |
+
# ๊ธฐ์กด preprocess_prompt ํจ์ ์ฝ๋ ์ ์ง
|
30 |
+
# ์ด๋ฏธ์ง ์๋ ์ฐธ์กฐ ํ์ธ ๋ฐ ์ฒ๋ฆฌ
|
31 |
has_img1 = image1 is not None
|
32 |
has_img2 = image2 is not None
|
33 |
has_img3 = image3 is not None
|
34 |
+
|
35 |
+
# #1, #2, #3 ์ฐธ์กฐ๋ฅผ ์ค๋ช
์ผ๋ก ๋ณํ (์ด๋ฏธ์ง๊ฐ ์๋ ๊ฒฝ์ฐ ๋ฌด์)
|
36 |
if "#1" in prompt and not has_img1:
|
37 |
prompt = prompt.replace("#1", "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)")
|
38 |
else:
|
39 |
prompt = prompt.replace("#1", "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง")
|
40 |
+
|
41 |
if "#2" in prompt and not has_img2:
|
42 |
prompt = prompt.replace("#2", "๋ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)")
|
43 |
else:
|
44 |
prompt = prompt.replace("#2", "๋ ๋ฒ์งธ ์ด๋ฏธ์ง")
|
45 |
+
|
46 |
if "#3" in prompt and not has_img3:
|
47 |
prompt = prompt.replace("#3", "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง(์์)")
|
48 |
else:
|
49 |
prompt = prompt.replace("#3", "์ธ ๋ฒ์งธ ์ด๋ฏธ์ง")
|
50 |
+
|
51 |
+
# ๊ธฐ๋ฅ ๋ช
๋ น ํด์
|
52 |
if "1. ์ด๋ฏธ์ง ๋ณ๊ฒฝ" in prompt:
|
53 |
+
# ์ค๋ช
์ถ์ถ์ ์๋ํ์ง๋ง ์คํจํด๋ ๊ธฐ๋ณธ ํ๋กฌํํธ ์ ๊ณต
|
54 |
desc_match = re.search(r'#1์ "(.*?)"์ผ๋ก ๋ฐ๊ฟ๋ผ', prompt)
|
55 |
if desc_match:
|
56 |
description = desc_match.group(1)
|
57 |
prompt = f"์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ {description}์ผ๋ก ๋ณ๊ฒฝํด์ฃผ์ธ์. ์๋ณธ ์ด๋ฏธ์ง์ ์ฃผ์ ๋ด์ฉ์ ์ ์งํ๋ ์๋ก์ด ์คํ์ผ๊ณผ ๋ถ์๊ธฐ๋ก ์ฌํด์ํด์ฃผ์ธ์."
|
58 |
else:
|
59 |
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ฐฝ์์ ์ผ๋ก ๋ณํํด์ฃผ์ธ์. ๋ ์์ํ๊ณ ์์ ์ ์ธ ๋ฒ์ ์ผ๋ก ๋ง๋ค์ด์ฃผ์ธ์."
|
60 |
+
|
61 |
elif "2. ๊ธ์์ง์ฐ๊ธฐ" in prompt:
|
62 |
+
# ์ง์ธ ํ
์คํธ ์ถ์ถ์ ์๋ํ์ง๋ง ์คํจํด๋ ๊ธฐ๋ณธ ํ๋กฌํํธ ์ ๊ณต
|
63 |
text_match = re.search(r'#1์์ "(.*?)"๋ฅผ ์ง์๋ผ', prompt)
|
64 |
if text_match:
|
65 |
text_to_remove = text_match.group(1)
|
66 |
prompt = f"์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์์ '{text_to_remove}' ํ
์คํธ๋ฅผ ์ฐพ์ ์์ฐ์ค๋ฝ๊ฒ ์ ๊ฑฐํด์ฃผ์ธ์. ํ
์คํธ๊ฐ ์๋ ๋ถ๋ถ์ ๋ฐฐ๊ฒฝ๊ณผ ์กฐํ๋กญ๊ฒ ์ฑ์์ฃผ์ธ์."
|
67 |
else:
|
68 |
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์์ ๋ชจ๋ ํ
์คํธ๋ฅผ ์ฐพ์ ์์ฐ์ค๋ฝ๊ฒ ์ ๊ฑฐํด์ฃผ์ธ์. ๊น๋ํ ์ด๋ฏธ์ง๋ก ๋ง๋ค์ด์ฃผ์ธ์."
|
69 |
+
|
70 |
+
elif "3. ์ผ๊ตด๋ฐ๊พธ๊ธฐ" in prompt:
|
71 |
+
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ ์ผ๊ตด์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ผ๊ตด๋ก ์์ฐ์ค๋ฝ๊ฒ ๊ต์ฒดํด์ฃผ์ธ์. ์ผ๊ตด์ ํ์ ๊ณผ ํน์ง์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ๋ฐ๋ฅด๋, ๋๋จธ์ง ๋ถ๋ถ์ ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ ์งํด์ฃผ์ธ์."
|
72 |
+
|
73 |
elif "4. ์ท๋ฐ๊พธ๊ธฐ" in prompt:
|
74 |
+
# ์ฌ๋ฌ ์ด๋ฏธ์ง ์ฐธ์กฐ ์ฒ๋ฆฌ
|
75 |
+
if "#3" in prompt or "๋๋ #3" in prompt:
|
76 |
+
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ ์์์ ๋ ๋ฒ์งธ ๋๋ ์ธ ๋ฒ์งธ ์ด๏ฟฝ๏ฟฝ๏ฟฝ์ง์ ์์์ผ๋ก ์์ฐ์ค๋ฝ๊ฒ ๊ต์ฒดํด์ฃผ์ธ์. ์์์ ์คํ์ผ๊ณผ ์์์ ์ฐธ์กฐ ์ด๋ฏธ์ง๋ฅผ ๋ฐ๋ฅด๋, ์ ์ฒด ๋น์จ๊ณผ ํฌ์ฆ๋ ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ ์งํด์ฃผ์ธ์."
|
77 |
+
else:
|
78 |
+
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ธ๋ฌผ ์์์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์์์ผ๋ก ์์ฐ์ค๋ฝ๊ฒ ๊ต์ฒดํด์ฃผ์ธ์. ์์์ ์คํ์ผ๊ณผ ์์์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ๋ฐ๋ฅด๋, ์ ์ฒด ๋น์จ๊ณผ ํฌ์ฆ๋ ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์ ์งํด์ฃผ์ธ์."
|
79 |
+
|
80 |
elif "5. ๋ฐฐ๊ฒฝ๋ฐ๊พธ๊ธฐ" in prompt:
|
81 |
+
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ์ผ๋ก ์์ฐ์ค๋ฝ๊ฒ ๊ต์ฒดํด์ฃผ์ธ์. ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ฃผ์ ํผ์ฌ์ฒด๋ ์ ์งํ๊ณ , ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ฐฐ๊ฒฝ๊ณผ ์กฐํ๋กญ๊ฒ ํฉ์ฑํด์ฃผ์ธ์."
|
82 |
+
|
83 |
elif "6. ์ด๋ฏธ์ง ํฉ์ฑ(์ํํฌํจ)" in prompt:
|
84 |
+
# ์ฌ๋ฌ ์ด๋ฏธ์ง ์ฐธ์กฐ ์ฒ๋ฆฌ
|
85 |
+
if "#3" in prompt or "๋๋ #3" in prompt:
|
86 |
+
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ ๋ฒ์งธ, ์ธ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์์ฐ์ค๋ฝ๊ฒ ํฉ์ฑํด์ฃผ์ธ์. ๋ชจ๋ ์ด๋ฏธ์ง์ ์ฃผ์ ์์๋ฅผ ํฌํจํ๊ณ , ํนํ ์ํ์ด ์ ๋ณด์ด๋๋ก ์กฐํ๋กญ๊ฒ ํตํฉํด์ฃผ์ธ์."
|
87 |
+
else:
|
88 |
+
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ์์ฐ์ค๋ฝ๊ฒ ํฉ์ฑํด์ฃผ์ธ์. ๋ ์ด๋ฏธ์ง์ ์ฃผ์ ์์๋ฅผ ํฌํจํ๊ณ , ํนํ ์ํ์ด ์ ๋ณด์ด๋๋ก ์กฐํ๋กญ๊ฒ ํตํฉํด์ฃผ์ธ์."
|
89 |
+
|
90 |
+
elif "7. ์ด๋ฏธ์ง ํฉ์ฑ(์คํ์ผ์ ์ฉ)" in prompt:
|
91 |
+
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ๋ด์ฉ์ ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์คํ์ผ๋ก ๋ณํํด์ฃผ์ธ์. ์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์ฃผ์ ํผ์ฌ์ฒด์ ๊ตฌ๋๋ ์ ์งํ๋, ๋ ๋ฒ์งธ ์ด๋ฏธ์ง์ ์์ ์ ์คํ์ผ, ์์, ์ง๊ฐ์ ์ ์ฉํด์ฃผ์ธ์."
|
92 |
+
|
93 |
+
# ๊ฐ๋จํ ์์ ๋ณ๊ฒฝ ์์ฒญ ์ฒ๋ฆฌ
|
94 |
+
elif "์ ๋ถ์์์ผ๋ก ๋ฐ๊ฟ๋ผ" in prompt or "๋ฅผ ๋ถ์์์ผ๋ก ๋ฐ๊ฟ๋ผ" in prompt:
|
95 |
+
prompt = "์ฒซ ๋ฒ์งธ ์ด๋ฏธ์ง๋ฅผ ๋ถ์์ ํค์ผ๋ก ๋ณ๊ฒฝํด์ฃผ์ธ์. ์ ์ฒด์ ์ธ ์์์ ๋ถ์ ๊ณ์ด๋ก ์กฐ์ ํ๊ณ ์์ฐ์ค๋ฌ์ด ๋๋์ ์ ์งํด์ฃผ์ธ์."
|
96 |
+
|
97 |
+
# ๋ช
ํํ ์ด๋ฏธ์ง ์์ฑ ์์ฒญ ์ถ๊ฐ
|
98 |
prompt += " ์ด๋ฏธ์ง๋ฅผ ์์ฑํด์ฃผ์ธ์."
|
99 |
+
|
100 |
return prompt
|
101 |
|
102 |
def generate_with_images(prompt, images):
|
103 |
"""
|
104 |
+
๊ณต์ ๋ฌธ์์ ๊ธฐ๋ฐํ ์ฌ๋ฐ๋ฅธ API ํธ์ถ ๋ฐฉ์ ๊ตฌํ
|
105 |
"""
|
106 |
try:
|
107 |
+
# API ํค ํ์ธ
|
108 |
api_key = os.environ.get("GEMINI_API_KEY")
|
109 |
if not api_key:
|
110 |
return None, "API ํค๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. ํ๊ฒฝ๋ณ์๋ฅผ ํ์ธํด์ฃผ์ธ์."
|
111 |
+
|
112 |
+
# Gemini ํด๋ผ์ด์ธํธ ์ด๊ธฐํ
|
113 |
client = genai.Client(api_key=api_key)
|
114 |
+
|
115 |
logger.info(f"Gemini API ์์ฒญ ์์ - ํ๋กฌํํธ: {prompt}")
|
116 |
+
|
117 |
+
# ์ปจํ
์ธ ์ค๋น
|
118 |
+
contents = []
|
119 |
+
|
120 |
+
# ํ
์คํธ ํ๋กฌํํธ ์ถ๊ฐ
|
121 |
+
contents.append(prompt)
|
122 |
+
|
123 |
+
# ์ด๋ฏธ์ง ์ถ๊ฐ
|
124 |
for idx, img in enumerate(images, 1):
|
125 |
if img is not None:
|
126 |
contents.append(img)
|
127 |
logger.info(f"์ด๋ฏธ์ง #{idx} ์ถ๊ฐ๋จ")
|
128 |
+
|
129 |
+
# ์์ฑ ์ค์ - ๊ณต์ ๋ฌธ์์ ๋ฐ๋ผ responseModalities ์ค์
|
130 |
response = client.models.generate_content(
|
131 |
model="gemini-2.0-flash-exp-image-generation",
|
132 |
contents=contents,
|
|
|
138 |
max_output_tokens=8192
|
139 |
)
|
140 |
)
|
141 |
+
|
142 |
+
# ์์ ํ์ผ ์์ฑ
|
143 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
144 |
temp_path = tmp.name
|
145 |
+
|
146 |
result_text = ""
|
147 |
image_found = False
|
148 |
+
|
149 |
+
# ์๋ต ์ฒ๋ฆฌ
|
150 |
for part in response.candidates[0].content.parts:
|
151 |
if hasattr(part, 'text') and part.text:
|
152 |
result_text += part.text
|
|
|
155 |
save_binary_file(temp_path, part.inline_data.data)
|
156 |
image_found = True
|
157 |
logger.info("์๋ต์์ ์ด๋ฏธ์ง ์ถ์ถ ์ฑ๊ณต")
|
158 |
+
|
159 |
if not image_found:
|
160 |
return None, f"API์์ ์ด๋ฏธ์ง๋ฅผ ์์ฑํ์ง ๋ชปํ์ต๋๋ค. ์๋ต ํ
์คํธ: {result_text}"
|
161 |
+
|
162 |
+
# ๊ฒฐ๊ณผ ์ด๋ฏธ์ง ๋ฐํ
|
163 |
result_img = Image.open(temp_path)
|
164 |
if result_img.mode == "RGBA":
|
165 |
result_img = result_img.convert("RGB")
|
166 |
+
|
167 |
return result_img, f"์ด๋ฏธ์ง๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์์ฑ๋์์ต๋๋ค. {result_text}"
|
168 |
+
|
169 |
except Exception as e:
|
170 |
logger.exception("์ด๋ฏธ์ง ์์ฑ ์ค ์ค๋ฅ ๋ฐ์:")
|
171 |
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}"
|
172 |
|
173 |
def process_images_with_prompt(image1, image2, image3, prompt):
|
174 |
"""
|
175 |
+
3๊ฐ์ ์ด๋ฏธ์ง์ ํ๋กฌํํธ๋ฅผ ์ฒ๋ฆฌํ๋ ํจ์
|
|
|
176 |
"""
|
177 |
try:
|
178 |
+
# ์ด๋ฏธ์ง ๊ฐ์ ํ์ธ
|
179 |
images = [image1, image2, image3]
|
180 |
valid_images = [img for img in images if img is not None]
|
181 |
+
|
182 |
if not valid_images:
|
183 |
+
return None, "์ ์ด๋ ํ๋์ ์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํด์ฃผ์ธ์."
|
184 |
+
|
185 |
+
# ํ๋กฌํํธ ์ฒ๋ฆฌ
|
186 |
+
if not prompt or not prompt.strip():
|
187 |
+
# ํ๋กฌํํธ๊ฐ ์์ผ๋ฉด ์
๋ก๋๋ ์ด๋ฏธ์ง ์์ ๋ฐ๋ผ ์๋ ํฉ์ฑ ํ๋กฌํํธ๋ฅผ ์์ด๋ก ์์ฑ
|
|
|
|
|
|
|
|
|
188 |
if len(valid_images) == 1:
|
189 |
+
prompt = "Please creatively transform this image into a more vivid and artistic version."
|
190 |
logger.info("Default prompt generated for single image")
|
191 |
elif len(valid_images) == 2:
|
192 |
+
prompt = "Please seamlessly composite these two images, integrating their key elements harmoniously into a single image."
|
193 |
logger.info("Default prompt generated for two images")
|
194 |
else:
|
195 |
+
prompt = "Please creatively composite these three images, combining their main elements into a cohesive and natural scene."
|
196 |
logger.info("Default prompt generated for three images")
|
197 |
+
else:
|
198 |
+
# ํ๋กฌํํธ ์ ์ฒ๋ฆฌ ๋ฐ ๊ธฐ๋ฅ ๋ช
๋ น ํด์
|
199 |
+
prompt = preprocess_prompt(prompt, image1, image2, image3)
|
200 |
+
|
201 |
+
# ์๋ก์ด API ํธ์ถ ๋ฐฉ์ ์ฌ์ฉ
|
202 |
+
return generate_with_images(prompt, valid_images)
|
203 |
+
|
204 |
except Exception as e:
|
205 |
logger.exception("์ด๋ฏธ์ง ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์:")
|
206 |
+
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}"
|
207 |
|
208 |
+
# (๊ธฐ๋ฅ ์ ํ ๊ด๋ จ ์ฝ๋ ์ ์ฒด ์ญ์ ๋จ)
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
+
# Gradio ์ธํฐํ์ด์ค
|
211 |
with gr.Blocks() as demo:
|
212 |
gr.HTML(
|
213 |
"""
|
214 |
<div style="text-align: center; margin-bottom: 1rem;">
|
215 |
+
<h1>๊ฐ๋จํ ์ด๋ฏธ์ง ์์ฑ๊ธฐ</h1>
|
216 |
+
<p>์ด๋ฏธ์ง๋ฅผ ์
๋ก๋ํ๊ณ ๋ฐ๋ก ์คํํ๋ฉด ์๋์ผ๋ก ํฉ์ฑํฉ๋๋ค.</p>
|
217 |
</div>
|
218 |
"""
|
219 |
)
|
220 |
|
221 |
with gr.Row():
|
222 |
with gr.Column():
|
223 |
+
# 3๊ฐ์ ์ด๋ฏธ์ง ์
๋ ฅ
|
224 |
+
with gr.Row():
|
225 |
+
image1_input = gr.Image(type="pil", label="#1", image_mode="RGB")
|
226 |
+
image2_input = gr.Image(type="pil", label="#2", image_mode="RGB")
|
227 |
+
image3_input = gr.Image(type="pil", label="#3", image_mode="RGB")
|
228 |
+
|
229 |
+
# ํ๋กฌํํธ ์
๋ ฅ (์ ํ ์ฌํญ)
|
|
|
|
|
|
|
|
|
230 |
prompt_input = gr.Textbox(
|
231 |
+
lines=3,
|
232 |
+
placeholder="ํ๋กฌํํธ๋ฅผ ์
๋ ฅํ๊ฑฐ๋ ๋น์๋๋ฉด ์๋ ํฉ์ฑ๋ฉ๋๋ค.",
|
233 |
+
label="ํ๋กฌํํธ (์ ํ ์ฌํญ)"
|
234 |
)
|
235 |
+
|
236 |
+
# ์์ฑ ๋ฒํผ
|
237 |
+
submit_btn = gr.Button("์ด๋ฏธ์ง ์์ฑ", variant="primary")
|
238 |
+
|
239 |
with gr.Column():
|
240 |
+
# ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
241 |
+
output_image = gr.Image(label="์์ฑ๋ ์ด๋ฏธ์ง")
|
242 |
+
output_text = gr.Textbox(label="์ํ ๋ฉ์์ง")
|
243 |
+
|
244 |
+
# ์ฌ์ฉ๋ ํ๋กฌํํธ ํ์
|
245 |
+
prompt_display = gr.Textbox(label="์ฌ์ฉ๋ ํ๋กฌํํธ", visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
+
# ์ด๋ฏธ์ง ์์ฑ ๋ฒํผ ํด๋ฆญ ์ด๋ฒคํธ
|
248 |
+
def process_and_show_prompt(image1, image2, image3, prompt):
|
249 |
+
# ์ด๋ฏธ์ง ๊ฐ์ ํ์ธ
|
250 |
+
images = [image1, image2, image3]
|
251 |
+
valid_images = [img for img in images if img is not None]
|
252 |
+
|
253 |
+
try:
|
254 |
+
# ์๋ ํ๋กฌํํธ ์์ฑ ๋๋ ํ๋กฌํํธ ์ ์ฒ๋ฆฌ
|
255 |
+
auto_prompt = prompt
|
256 |
+
if not prompt or not prompt.strip():
|
257 |
+
if len(valid_images) == 1:
|
258 |
+
auto_prompt = "Please creatively transform this image into a more vivid and artistic version."
|
259 |
+
elif len(valid_images) == 2:
|
260 |
+
auto_prompt = "Please seamlessly composite these two images, integrating their key elements harmoniously into a single image."
|
261 |
+
else:
|
262 |
+
auto_prompt = "Please creatively composite these three images, combining their main elements into a cohesive and natural scene."
|
263 |
+
else:
|
264 |
+
auto_prompt = preprocess_prompt(prompt, image1, image2, image3)
|
265 |
+
|
266 |
+
# ์ด๋ฏธ์ง ์์ฑ ํจ์ ํธ์ถ
|
267 |
+
result_img, status = process_images_with_prompt(image1, image2, image3, prompt)
|
268 |
+
|
269 |
+
return result_img, status, auto_prompt
|
270 |
+
except Exception as e:
|
271 |
+
logger.exception("์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์:")
|
272 |
+
return None, f"์ค๋ฅ ๋ฐ์: {str(e)}", prompt
|
273 |
+
|
274 |
submit_btn.click(
|
275 |
+
fn=process_and_show_prompt,
|
276 |
+
inputs=[image1_input, image2_input, image3_input, prompt_input],
|
277 |
+
outputs=[output_image, output_text, prompt_display],
|
278 |
+
)
|
279 |
+
|
280 |
+
gr.Markdown(
|
281 |
+
"""
|
282 |
+
### ์ฌ์ฉ ๋ฐฉ๋ฒ:
|
283 |
+
|
284 |
+
1. **์๋ ํฉ์ฑ**: ์ด๋ฏธ์ง๋ง ์
๋ก๋ํ๊ณ ํ๋กฌํํธ๋ฅผ ๋น์๋๋ฉด ์๋์ผ๋ก ํฉ์ฑ๋ฉ๋๋ค.
|
285 |
+
2. **์ด๋ฏธ์ง ์ฐธ์กฐ**: #1, #2, #3์ผ๋ก ๊ฐ ์ด๋ฏธ์ง๋ฅผ ์ฐธ์กฐํ ์ ์์ต๋๋ค.
|
286 |
+
3. **์ผ๋ถ ์ด๋ฏธ์ง๋ง**: ํ์ํ ์ด๋ฏธ์ง๋ง ์
๋ก๋ํด๋ ๊ธฐ๋ฅ ์คํ์ด ๊ฐ๋ฅํฉ๋๋ค.
|
287 |
+
|
288 |
+
> **ํ**: ํ๋กฌํํธ๋ฅผ ์ง์ ์์ ํ ์๋ ์์ต๋๋ค.
|
289 |
+
"""
|
290 |
)
|
291 |
|
292 |
+
# ์ ํ๋ฆฌ์ผ์ด์
์คํ
|
293 |
+
if __name__ == "__main__":
|
294 |
+
demo.launch(share=True)
|