imagetalking / app.py
englissi's picture
Create app.py
6e28cb9 verified
raw
history blame
1.62 kB
import gradio as gr
from transformers import pipeline
# 1) ์ด๋ฏธ์ง€ ์บก์…”๋‹ ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
captioner = pipeline("image-captioning", model="Salesforce/blip-image-captioning-base")
# 2) (์˜ต์…˜) ์Šคํƒ€์ผ ๋ณ€ํ™˜ ํ•จ์ˆ˜
def style_convert(raw_caption, style):
if style == "TOEIC Speaking Part 1":
# Partโ€ฏ1: โ€œWhat do you see in the picture?โ€
# ๋‹ต๋ณ€: โ€œI see ~.โ€ ํ•œ ๋ฌธ์žฅ
return f"Q: What do you see in the picture?\nA: {raw_caption.capitalize()}."
elif style == "IELTS Describe a Photo":
return f"Describe the photo in two sentences:\n1. {raw_caption.capitalize()}.\n2. It also shows the context of daily life."
else:
return raw_caption
# 3) Gradio ์ธํ„ฐํŽ˜์ด์Šค ํ•จ์ˆ˜
def generate_caption(image, style):
# 3.1 ์ด๋ฏธ์ง€ ์บก์…”๋‹
result = captioner(image, max_length=30, num_beams=3)[0]["caption"]
# 3.2 ์Šคํƒ€์ผ ๋ณ€ํ™˜
return style_convert(result, style)
# 4) Gradio Blocks ์ •์˜
with gr.Blocks() as demo:
gr.Markdown("## ๐Ÿ“ธ ์ด๋ฏธ์ง€ ์บก์…”๋‹ โ†’ English Test ์Šคํƒ€์ผ ๋ฌธ์žฅ ์ƒ์„ฑ")
with gr.Row():
img_in = gr.Image(type="pil", label="Upload Image")
style_sel = gr.Dropdown(
choices=["Raw Caption", "TOEIC Speaking Part 1", "IELTS Describe a Photo"],
value="TOEIC Speaking Part 1",
label="์‹œํ—˜ ํ˜•์‹ ์„ ํƒ"
)
output = gr.Textbox(label="Generated Caption", lines=4)
btn = gr.Button("Generate")
btn.click(fn=generate_caption, inputs=[img_in, style_sel], outputs=output)
# 5) ์•ฑ ์‹คํ–‰
if __name__ == "__main__":
demo.launch()