imagetalking / app.py
englissi's picture
Update app.py
16ac547 verified
raw
history blame
1.77 kB
import gradio as gr
from transformers import pipeline
# 1) 이미지 캑셔닝 νŒŒμ΄ν”„λΌμΈ μ΄ˆκΈ°ν™”
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
def generate_caption(image, style):
# 1) 원본 μΊ‘μ…˜ μ–»κΈ°
output = captioner(image)
# output μ˜ˆμ‹œ: [{"generated_text": "..."}] λ˜λŠ” [{"text": "..."}] ν˜Ήμ€ ["..."]
# μ•ˆμ „ν•˜κ²Œ μΆ”μΆœ
if isinstance(output, list) and output:
first = output[0]
if isinstance(first, dict):
raw_caption = first.get("generated_text") or first.get("text") or ""
else:
raw_caption = str(first)
else:
raw_caption = str(output)
raw_caption = raw_caption.strip()
# 2) μŠ€νƒ€μΌ λ³€ν™˜
if style == "TOEIC Speaking Part 1":
return f"Q: What do you see in the picture?\nA: {raw_caption.capitalize()}."
elif style == "IELTS Describe a Photo":
return (
"Describe the photo in two sentences:\n"
f"1. {raw_caption.capitalize()}.\n"
"2. It also shows the context of daily life."
)
else:
return raw_caption
with gr.Blocks() as demo:
gr.Markdown("## πŸ“Έ 이미지 캑셔닝 β†’ English Test μŠ€νƒ€μΌ λ¬Έμž₯ 생성")
with gr.Row():
img_in = gr.Image(type="pil", label="Upload Image")
style_sel = gr.Dropdown(
choices=["Raw Caption", "TOEIC Speaking Part 1", "IELTS Describe a Photo"],
value="TOEIC Speaking Part 1",
label="μ‹œν—˜ ν˜•μ‹ 선택"
)
output = gr.Textbox(label="Generated Caption", lines=4)
btn = gr.Button("Generate")
btn.click(fn=generate_caption, inputs=[img_in, style_sel], outputs=output)
if __name__ == "__main__":
demo.launch()