|
import json |
|
import os |
|
import time |
|
import uuid |
|
import tempfile |
|
from PIL import Image |
|
import gradio as gr |
|
import base64 |
|
import mimetypes |
|
import logging |
|
|
|
from google import genai |
|
from google.genai import types |
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG, |
|
format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def save_binary_file(file_name, data): |
|
logger.debug(f"Saving binary data to file: {file_name}") |
|
with open(file_name, "wb") as f: |
|
f.write(data) |
|
logger.debug(f"File saved successfully: {file_name}") |
|
|
|
|
|
def generate(text, file_name, api_key, model="gemini-2.0-flash-exp-image-generation"): |
|
logger.debug(f"Starting generate function with text: '{text}', file_name: '{file_name}', model: '{model}'") |
|
|
|
try: |
|
|
|
effective_api_key = api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY") |
|
logger.debug(f"Using API Key: {'Provided' if api_key.strip() else 'From Environment Variable'}") |
|
|
|
if not effective_api_key: |
|
logger.error("No API key provided or found in environment variable.") |
|
raise ValueError("API key is required.") |
|
|
|
client = genai.Client(api_key=effective_api_key) |
|
logger.debug("Gemini client initialized.") |
|
|
|
|
|
files = [ |
|
client.files.upload(file=file_name), |
|
] |
|
logger.debug(f"File uploaded. URI: {files[0].uri}, MIME Type: {files[0].mime_type}") |
|
|
|
|
|
contents = [ |
|
types.Content( |
|
role="user", |
|
parts=[ |
|
types.Part.from_uri( |
|
file_uri=files[0].uri, |
|
mime_type=files[0].mime_type, |
|
), |
|
types.Part.from_text(text=text), |
|
], |
|
), |
|
] |
|
logger.debug(f"Content object created: {contents}") |
|
|
|
generate_content_config = types.GenerateContentConfig( |
|
temperature=1, |
|
top_p=0.95, |
|
top_k=40, |
|
max_output_tokens=8192, |
|
response_modalities=[ |
|
"image", |
|
"text", |
|
], |
|
response_mime_type="text/plain", |
|
) |
|
logger.debug(f"Generate content config: {generate_content_config}") |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
temp_path = tmp.name |
|
logger.debug(f"Temporary file created: {temp_path}") |
|
|
|
response_stream = client.models.generate_content_stream( |
|
model=model, |
|
contents=contents, |
|
config=generate_content_config, |
|
) |
|
|
|
logger.debug("Starting to process response stream...") |
|
for chunk in response_stream: |
|
if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts: |
|
logger.warning("Chunk has no candidates, content, or parts. Skipping.") |
|
continue |
|
|
|
inline_data = chunk.candidates[0].content.parts[0].inline_data |
|
if inline_data: |
|
save_binary_file(temp_path, inline_data.data) |
|
logger.info(f"File of mime type {inline_data.mime_type} saved to: {temp_path} and prompt input :{text}") |
|
else: |
|
logger.info(f"Received text: {chunk.text}") |
|
print(chunk.text) |
|
|
|
|
|
logger.debug(f"Raw chunk: {chunk}") |
|
|
|
del files |
|
logger.debug("Uploaded files deleted.") |
|
return temp_path |
|
|
|
except Exception as e: |
|
logger.exception("An error occurred during generation:") |
|
return None |
|
|
|
|
|
def process_image_and_prompt(composite_pil, prompt, gemini_api_key): |
|
logger.debug(f"Starting process_image_and_prompt with prompt: '{prompt}'") |
|
try: |
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
|
composite_path = tmp.name |
|
composite_pil.save(composite_path) |
|
logger.debug(f"Composite image saved to: {composite_path}") |
|
|
|
file_name = composite_path |
|
input_text = prompt |
|
model = "gemini-2.0-flash-exp-image-generation" |
|
|
|
gemma_edited_image_path = generate(text=input_text, file_name=file_name, api_key=gemini_api_key, model=model) |
|
|
|
if gemma_edited_image_path: |
|
logger.debug(f"Image generated at path: {gemma_edited_image_path}") |
|
result_img = Image.open(gemma_edited_image_path) |
|
if result_img.mode == "RGBA": |
|
result_img = result_img.convert("RGB") |
|
return [result_img] |
|
else: |
|
logger.error("generate function returned None.") |
|
return [] |
|
|
|
except Exception as e: |
|
logger.exception("Error occurred in process_image_and_prompt") |
|
return [] |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML( |
|
""" |
|
<div style='display: flex; align-items: center; justify-content: center; gap: 20px'> |
|
<div style="background-color: var(--block-background-fill); border-radius: 8px"> |
|
<img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;"> |
|
</div> |
|
<div> |
|
<h1></h1> |
|
<p>ပုံရိပ်တည်းဖြတ်ရန် Gemini</p> |
|
<p>API Key ကို <a href="https://aistudio.google.com/apikey">ဤနေရာ</a> တွင် ဖန်တီးပါ</p> |
|
</div> |
|
</div> |
|
""" |
|
) |
|
gr.Markdown("ပုံတစ်ပုံ တင်ပြီး ပုံကိုတည်းဖြတ်ရန် သင်လိုချင်တာကို ရိုက်ထည့်ပါ။") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
image_input = gr.Image(type="pil", label="ပုံတင်ရန်", image_mode="RGBA") |
|
gemini_api_key = gr.Textbox( |
|
lines=1, |
|
placeholder="Gemini API Key ထည့်ပါ", |
|
label="Gemini API Key", |
|
type="password" |
|
) |
|
prompt_input = gr.Textbox( |
|
lines=2, |
|
placeholder="သင်လိုချင်တာကို ဤနေရာတွင် ရိုက်ထည့်ပါ...", |
|
label="သင်လိုချင်တာ" |
|
) |
|
submit_btn = gr.Button("ထုတ်လုပ်ပါ") |
|
with gr.Column(): |
|
output_gallery = gr.Gallery(label="ထုတ်လုပ်ပြီးရလဒ်များ") |
|
|
|
submit_btn.click( |
|
fn=process_image_and_prompt, |
|
inputs=[image_input, prompt_input, gemini_api_key], |
|
outputs=output_gallery, |
|
) |
|
|
|
|
|
|
|
dummy_image = Image.new("RGBA", (100, 100), color="red") |
|
dummy_prompt = "Make the image blue" |
|
dummy_api_key = os.environ.get("GEMINI_API_KEY") |
|
|
|
|
|
logger.info("Calling process_image_and_prompt directly...") |
|
result = process_image_and_prompt(dummy_image, dummy_prompt, dummy_api_key) |
|
|
|
if result: |
|
logger.info(f"Direct call successful. Result: {result}") |
|
|
|
else: |
|
logger.error("Direct call failed.") |
|
|
|
demo.launch(share=True) |