Spaces:
Sleeping
Sleeping
import os | |
from PIL import Image | |
import gradio as gr | |
from transformers import pipeline | |
# 1) ํ์ดํ๋ผ์ธ ์ด๊ธฐํ | |
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
scene_classifier = pipeline( | |
"zero-shot-image-classification", | |
model="openai/clip-vit-base-patch32" | |
) | |
# 2) ์ฅ๋ฉด ๋ ์ด๋ธ & ํ ํ๋ฆฟ ์ ์ | |
SCENE_LABELS = [ | |
"outdoor", "indoor", "beach", "office", "street", | |
"restaurant", "park", "sports", "kitchen", "mountain" | |
] | |
TEMPLATES = { | |
"outdoor": "In this picture, {caption}. It looks like a pleasant outdoor setting, and the subject seems relaxed.", | |
"indoor": "In this picture, {caption}. It appears to be indoors, perhaps at home or in an office environment.", | |
"beach": "In this picture, {caption}. It seems to be on a beach, and the atmosphere looks warm and sunny.", | |
"office": "In this picture, {caption}. It looks like an office scene, with people engaged in work or discussion.", | |
"street": "In this picture, {caption}. The scene appears to be on a busy street, with vehicles and pedestrians.", | |
"restaurant": "In this picture, {caption}. It looks like a restaurant setting, where people are dining together.", | |
"park": "In this picture, {caption}. The location seems to be a park, with trees and open space.", | |
"sports": "In this picture, {caption}. It appears to be a sports activity, showing movement and action.", | |
"kitchen": "In this picture, {caption}. It seems to be in a kitchen, with cooking utensils visible.", | |
"mountain": "In this picture, {caption}. The background looks like mountains, suggesting a hiking scene." | |
} | |
def generate_caption(image_path): | |
try: | |
# 1) ์ด๋ฏธ์ง ๋ถ๋ฌ์ค๊ธฐ | |
img = Image.open(image_path).convert("RGB") | |
# 2) ์๋ณธ ์บก์ ์์ฑ | |
out = captioner(img) | |
first = out[0] if isinstance(out, list) else out | |
raw = first.get("generated_text") or first.get("text") or str(first) | |
raw = raw.strip() | |
# 3) ์ฅ๋ฉด ๋ถ๋ฅ | |
cls = scene_classifier(img, candidate_labels=SCENE_LABELS) | |
scene = cls["labels"][0] | |
# 4) ํ ํ๋ฆฟ ๋งคํ ๋ฐ ๋ฆฌํด | |
template = TEMPLATES.get(scene, "In this picture, {caption}.") | |
return template.format(caption=raw) | |
except Exception as e: | |
return f"๐ด Error: {e}" | |
# 5) Gradio ์ธํฐํ์ด์ค ์ ์ | |
with gr.Blocks() as demo: | |
gr.Markdown("## ๐ธ TOEIC Partโฏ1: ์ํฉ๋ณ ์ฌ์ง ๋ฌ์ฌ (Single Image)") | |
img_in = gr.Image(type="filepath", label="Upload an image") | |
btn = gr.Button("Describe") | |
output = gr.Textbox(label="TOEIC Partโฏ1 Response", lines=4) | |
btn.click(fn=generate_caption, inputs=img_in, outputs=output) | |
# 6) ์ฑ ์คํ | |
if __name__ == "__main__": | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=int(os.environ.get("PORT", 7860)) | |
) | |