Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -78,7 +78,7 @@ def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
|
|
78 |
- file_name: ์๋ณธ ์ด๋ฏธ์ง(์: .png) ๊ฒฝ๋ก
|
79 |
- model: ์ฌ์ฉํ gemini ๋ชจ๋ธ ์ด๋ฆ
|
80 |
"""
|
81 |
-
#
|
82 |
api_key = os.getenv("GAPI_TOKEN", None)
|
83 |
if not api_key:
|
84 |
raise ValueError(
|
@@ -86,12 +86,13 @@ def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
|
|
86 |
"Google GenAI API๋ฅผ ์ฌ์ฉํ๊ธฐ ์ํด์๋ GAPI_TOKEN์ด ํ์ํฉ๋๋ค."
|
87 |
)
|
88 |
|
|
|
89 |
client = genai.Client(api_key=api_key)
|
90 |
|
91 |
-
# ์ด๋ฏธ์ง ์
๋ก๋
|
92 |
files = [client.files.upload(file=file_name)]
|
93 |
|
94 |
-
# gemini์ ์ ๋ฌํ Content ์ค๋น
|
95 |
contents = [
|
96 |
types.Content(
|
97 |
role="user",
|
@@ -105,6 +106,7 @@ def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
|
|
105 |
),
|
106 |
]
|
107 |
|
|
|
108 |
generate_content_config = types.GenerateContentConfig(
|
109 |
temperature=1,
|
110 |
top_p=0.95,
|
@@ -117,9 +119,10 @@ def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
|
|
117 |
text_response = ""
|
118 |
image_path = None
|
119 |
|
120 |
-
# ์์
|
121 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
122 |
temp_path = tmp.name
|
|
|
123 |
for chunk in client.models.generate_content_stream(
|
124 |
model=model,
|
125 |
contents=contents,
|
@@ -129,32 +132,39 @@ def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
|
|
129 |
continue
|
130 |
candidate = chunk.candidates[0].content.parts[0]
|
131 |
|
132 |
-
# inline_data
|
133 |
if candidate.inline_data:
|
134 |
save_binary_file(temp_path, candidate.inline_data.data)
|
135 |
print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
|
136 |
image_path = temp_path
|
137 |
break
|
138 |
else:
|
|
|
139 |
text_response += chunk.text + "\n"
|
140 |
|
|
|
141 |
del files
|
|
|
142 |
return image_path, text_response
|
143 |
|
144 |
#######################################
|
145 |
-
# 3. Gradio ํจ์
|
|
|
146 |
#######################################
|
147 |
|
148 |
def generate_initial_image(prompt, text, height, width, steps, scale, seed):
|
149 |
"""
|
150 |
-
FLUX
|
151 |
-
- prompt
|
152 |
-
-
|
153 |
"""
|
154 |
if "<text>" in prompt:
|
155 |
combined_prompt = prompt.replace("<text>", text)
|
156 |
else:
|
157 |
combined_prompt = f"{prompt} with clear readable text that says '{text}'"
|
|
|
|
|
|
|
158 |
|
159 |
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("inference"):
|
160 |
result = pipe(
|
@@ -171,11 +181,10 @@ def generate_initial_image(prompt, text, height, width, steps, scale, seed):
|
|
171 |
|
172 |
def change_text_in_image(original_image, new_text):
|
173 |
"""
|
174 |
-
|
175 |
์
๋ก๋๋ ์ด๋ฏธ์ง ๋ด๋ถ์ ๋ฌธ๊ตฌ๋ฅผ `new_text`๋ก ๋ณ๊ฒฝํด์ฃผ๋ ํจ์.
|
176 |
"""
|
177 |
try:
|
178 |
-
# ์์ ํ์ผ์ ๋จผ์ ์ ์ฅ
|
179 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
180 |
original_path = tmp.name
|
181 |
original_image.save(original_path)
|
@@ -187,35 +196,34 @@ def change_text_in_image(original_image, new_text):
|
|
187 |
)
|
188 |
|
189 |
if image_path:
|
190 |
-
# Gradio ๊ตฌ๋ฒ์ ์๋ decode_base64_to_image๊ฐ ์์ผ๋ฏ๋ก PIL
|
191 |
with open(image_path, "rb") as f:
|
192 |
image_data = f.read()
|
193 |
modified_img = Image.open(io.BytesIO(image_data))
|
194 |
return modified_img, ""
|
195 |
else:
|
|
|
196 |
return None, text_response
|
197 |
|
198 |
except Exception as e:
|
199 |
raise gr.Error(f"Error: {e}")
|
200 |
|
201 |
#######################################
|
202 |
-
# 4. Gradio ์ธํฐํ์ด์ค
|
203 |
#######################################
|
204 |
|
205 |
with gr.Blocks(title="Flux + Google GenAI Text Replacement") as demo:
|
206 |
gr.Markdown(
|
207 |
"""
|
208 |
-
# Flux
|
209 |
-
|
210 |
-
**Usage
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
- If `<text>` is **not** found, the text will be appended automatically as `with clear readable text that says ...`.
|
218 |
-
4. (Optional) If you want to change the text again, use the "Change Text in Image" button.
|
219 |
|
220 |
---
|
221 |
"""
|
@@ -223,32 +231,32 @@ with gr.Blocks(title="Flux + Google GenAI Text Replacement") as demo:
|
|
223 |
|
224 |
with gr.Row():
|
225 |
with gr.Column():
|
226 |
-
gr.Markdown("## 1)
|
227 |
prompt_input = gr.Textbox(
|
228 |
lines=3,
|
229 |
-
label="Prompt (
|
230 |
-
placeholder="e.g. A white cat says <text>
|
231 |
)
|
232 |
text_input = gr.Textbox(
|
233 |
lines=1,
|
234 |
-
label="
|
235 |
-
placeholder="e.g. ์๋
"
|
236 |
)
|
237 |
-
with gr.Accordion("
|
238 |
height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
|
239 |
width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
|
240 |
steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
|
241 |
-
scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=
|
242 |
seed = gr.Number(label="Seed (reproducibility)", value=1234, precision=0)
|
243 |
|
244 |
generate_btn = gr.Button("Generate Base Image", variant="primary")
|
245 |
-
generated_image = gr.Image(label="Generated Image", type="pil")
|
246 |
|
247 |
with gr.Column():
|
248 |
-
gr.Markdown("## 2)
|
249 |
new_text_input = gr.Textbox(
|
250 |
-
label="
|
251 |
-
placeholder="
|
252 |
)
|
253 |
modify_btn = gr.Button("Change Text in Image via Gemini", variant="secondary")
|
254 |
output_img = gr.Image(label="Modified Image", type="pil")
|
|
|
78 |
- file_name: ์๋ณธ ์ด๋ฏธ์ง(์: .png) ๊ฒฝ๋ก
|
79 |
- model: ์ฌ์ฉํ gemini ๋ชจ๋ธ ์ด๋ฆ
|
80 |
"""
|
81 |
+
# (1) ํ๊ฒฝ ๋ณ์์์ API ํค ๊ฐ์ ธ์ค๊ธฐ (ํ์)
|
82 |
api_key = os.getenv("GAPI_TOKEN", None)
|
83 |
if not api_key:
|
84 |
raise ValueError(
|
|
|
86 |
"Google GenAI API๋ฅผ ์ฌ์ฉํ๊ธฐ ์ํด์๋ GAPI_TOKEN์ด ํ์ํฉ๋๋ค."
|
87 |
)
|
88 |
|
89 |
+
# (2) Google Client ์ด๊ธฐํ
|
90 |
client = genai.Client(api_key=api_key)
|
91 |
|
92 |
+
# (3) ์ด๋ฏธ์ง ์
๋ก๋
|
93 |
files = [client.files.upload(file=file_name)]
|
94 |
|
95 |
+
# (4) gemini์ ์ ๋ฌํ Content ์ค๋น (์ด๋ฏธ์ง + ํ๋กฌํํธ)
|
96 |
contents = [
|
97 |
types.Content(
|
98 |
role="user",
|
|
|
106 |
),
|
107 |
]
|
108 |
|
109 |
+
# (5) ์์ฑ/๋ณํ ์ค์
|
110 |
generate_content_config = types.GenerateContentConfig(
|
111 |
temperature=1,
|
112 |
top_p=0.95,
|
|
|
119 |
text_response = ""
|
120 |
image_path = None
|
121 |
|
122 |
+
# ์์ ํ์ผ๋ก ์ด๋ฏธ์ง ๋ฐ์ ์ค๋น
|
123 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
124 |
temp_path = tmp.name
|
125 |
+
# ์๋ต ์คํธ๋ฆผ์ ๋ฐ์ผ๋ฉด์ ์ด๋ฏธ์ง/ํ
์คํธ ๊ตฌ๋ถ ์ฒ๋ฆฌ
|
126 |
for chunk in client.models.generate_content_stream(
|
127 |
model=model,
|
128 |
contents=contents,
|
|
|
132 |
continue
|
133 |
candidate = chunk.candidates[0].content.parts[0]
|
134 |
|
135 |
+
# inline_data๊ฐ ์์ผ๋ฉด ์ด๋ฏธ์ง ์๋ต
|
136 |
if candidate.inline_data:
|
137 |
save_binary_file(temp_path, candidate.inline_data.data)
|
138 |
print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
|
139 |
image_path = temp_path
|
140 |
break
|
141 |
else:
|
142 |
+
# ์ด๋ฏธ์ง ์์ด ํ
์คํธ๋ง ๋ฐํ๋๋ ๊ฒฝ์ฐ
|
143 |
text_response += chunk.text + "\n"
|
144 |
|
145 |
+
# ์
๋ก๋ํ File ๊ฐ์ฒด ์ ๊ฑฐ
|
146 |
del files
|
147 |
+
|
148 |
return image_path, text_response
|
149 |
|
150 |
#######################################
|
151 |
+
# 3. Gradio ํจ์
|
152 |
+
# (1) FLUX๋ก ์ด๋ฏธ์ง ์์ฑ -> (2) Google GenAI๋ก ํ
์คํธ ๊ต์ฒด
|
153 |
#######################################
|
154 |
|
155 |
def generate_initial_image(prompt, text, height, width, steps, scale, seed):
|
156 |
"""
|
157 |
+
FLUX ํ์ดํ๋ผ์ธ์ ์ฌ์ฉํด 'ํ
์คํธ๊ฐ ํฌํจ๋ ์ด๋ฏธ์ง๋ฅผ' ๋จผ์ ์์ฑ.
|
158 |
+
- prompt ๋ด <text>๋ฅผ text๋ก ์นํ
|
159 |
+
- <text>๊ฐ ์๋ค๋ฉด "with clear readable text that says '<text>'"๋ฅผ ์๋ ๋ถ์
|
160 |
"""
|
161 |
if "<text>" in prompt:
|
162 |
combined_prompt = prompt.replace("<text>", text)
|
163 |
else:
|
164 |
combined_prompt = f"{prompt} with clear readable text that says '{text}'"
|
165 |
+
|
166 |
+
# ๋๋ฒ๊ทธ์ฉ: ์ต์ข
๋ค์ด๊ฐ๋ ํ๋กฌํํธ๋ฅผ ํ์ธ
|
167 |
+
print(f"[DEBUG] Final combined_prompt: {combined_prompt}")
|
168 |
|
169 |
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("inference"):
|
170 |
result = pipe(
|
|
|
181 |
|
182 |
def change_text_in_image(original_image, new_text):
|
183 |
"""
|
184 |
+
Google GenAI์ gemini ๋ชจ๋ธ์ ํตํด,
|
185 |
์
๋ก๋๋ ์ด๋ฏธ์ง ๋ด๋ถ์ ๋ฌธ๊ตฌ๋ฅผ `new_text`๋ก ๋ณ๊ฒฝํด์ฃผ๋ ํจ์.
|
186 |
"""
|
187 |
try:
|
|
|
188 |
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
189 |
original_path = tmp.name
|
190 |
original_image.save(original_path)
|
|
|
196 |
)
|
197 |
|
198 |
if image_path:
|
199 |
+
# Gradio ๊ตฌ๋ฒ์ ์๋ decode_base64_to_image๊ฐ ์์ผ๋ฏ๋ก PIL๋ก ์ฒ๋ฆฌ
|
200 |
with open(image_path, "rb") as f:
|
201 |
image_data = f.read()
|
202 |
modified_img = Image.open(io.BytesIO(image_data))
|
203 |
return modified_img, ""
|
204 |
else:
|
205 |
+
# ์ด๋ฏธ์ง๊ฐ ์์ด ํ
์คํธ๋ง ๋ฐํ๋ ๊ฒฝ์ฐ
|
206 |
return None, text_response
|
207 |
|
208 |
except Exception as e:
|
209 |
raise gr.Error(f"Error: {e}")
|
210 |
|
211 |
#######################################
|
212 |
+
# 4. Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
|
213 |
#######################################
|
214 |
|
215 |
with gr.Blocks(title="Flux + Google GenAI Text Replacement") as demo:
|
216 |
gr.Markdown(
|
217 |
"""
|
218 |
+
# Flux ๊ธฐ๋ฐ ์ด๋ฏธ์ง ์์ฑ + Google GenAI๋ฅผ ํตํ ํ
์คํธ ๋ณํ
|
219 |
+
|
220 |
+
**Usage**:
|
221 |
+
- You can include `<text>` in the prompt. For example:
|
222 |
+
`white cat with speech bubble says <text>`
|
223 |
+
- Then, type the actual text in "Text to Include in the Image" (ex: "Hello" or "์๋
").
|
224 |
+
- If `<text>` is not found in your prompt, the text will be automatically appended as:
|
225 |
+
`with clear readable text that says '<text>'`.
|
226 |
+
- Finally, you can optionally change the text again via Gemini.
|
|
|
|
|
227 |
|
228 |
---
|
229 |
"""
|
|
|
231 |
|
232 |
with gr.Row():
|
233 |
with gr.Column():
|
234 |
+
gr.Markdown("## 1) Step 1: FLUX๋ก ํ
์คํธ ํฌํจ ์ด๋ฏธ์ง ์์ฑ")
|
235 |
prompt_input = gr.Textbox(
|
236 |
lines=3,
|
237 |
+
label="์ด๋ฏธ์ง ์ฅ๋ฉด/๋ฐฐ๊ฒฝ Prompt (use `<text>` placeholder if you like)",
|
238 |
+
placeholder="e.g. A white cat with speech bubble says <text>"
|
239 |
)
|
240 |
text_input = gr.Textbox(
|
241 |
lines=1,
|
242 |
+
label="์ด๋ฏธ์ง ์์ ๋ค์ด๊ฐ ํ
์คํธ",
|
243 |
+
placeholder="e.g. Hello or ์๋
"
|
244 |
)
|
245 |
+
with gr.Accordion("๊ณ ๊ธ ์ค์ (ํ์ฅ)", open=False):
|
246 |
height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
|
247 |
width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
|
248 |
steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
|
249 |
+
scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.5, value=3.5)
|
250 |
seed = gr.Number(label="Seed (reproducibility)", value=1234, precision=0)
|
251 |
|
252 |
generate_btn = gr.Button("Generate Base Image", variant="primary")
|
253 |
+
generated_image = gr.Image(label="Generated Image (with text)", type="pil")
|
254 |
|
255 |
with gr.Column():
|
256 |
+
gr.Markdown("## 2) Step 2: ์์ฑ๋ ์ด๋ฏธ์ง ๋ด ํ
์คํธ ์์ ")
|
257 |
new_text_input = gr.Textbox(
|
258 |
+
label="์๋ก ๋ฐ๊ฟ ํ
์คํธ",
|
259 |
+
placeholder="์) Hello world"
|
260 |
)
|
261 |
modify_btn = gr.Button("Change Text in Image via Gemini", variant="secondary")
|
262 |
output_img = gr.Image(label="Modified Image", type="pil")
|