Spaces:

englissi
/

imagetalking

Sleeping

englissi commited on 16 days ago

Commit

cd00a55

verified ·

1 Parent(s): dfd8ef3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
 from transformers import pipeline
 captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 scene_classifier = pipeline(
     "zero-shot-image-classification",
@@ -22,28 +24,35 @@ TEMPLATES = {
     "mountain":   "In this picture, {caption}. The background looks like mountains, suggesting a hiking scene."
 }
-def generate_caption(images, choice_index):
     idx = int(choice_index)
-    img = images[idx]
-    # raw caption
     out = captioner(img)
     first = out[0] if isinstance(out, list) else out
     raw = first.get("generated_text") or first.get("text") or str(first)
     raw = raw.strip()
-    # scene classification
     cls = scene_classifier(img, candidate_labels=SCENE_LABELS)
     scene = cls["labels"][0]
-    # template mapping
     template = TEMPLATES.get(scene, "In this picture, {caption}.")
     return template.format(caption=raw)
 with gr.Blocks() as demo:
     gr.Markdown("## 📸 TOEIC Part 1: 상황별 사진 묘사")
-    img_inputs = gr.Files(file_count="multiple", type="pil",
-                          label="Upload up to 4 images")
     choice = gr.Dropdown(
         choices=[str(i) for i in range(4)],
         value="0",

 import gradio as gr
 from transformers import pipeline
+from PIL import Image
+# ① 파이프라인 초기화
 captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 scene_classifier = pipeline(
     "zero-shot-image-classification",
     "mountain":   "In this picture, {caption}. The background looks like mountains, suggesting a hiking scene."
 }
+def generate_caption(filepaths, choice_index):
     idx = int(choice_index)
+    img_path = filepaths[idx]
+    # PIL로 이미지 로드
+    img = Image.open(img_path).convert("RGB")
+    # 1) 원본 캡션
     out = captioner(img)
     first = out[0] if isinstance(out, list) else out
     raw = first.get("generated_text") or first.get("text") or str(first)
     raw = raw.strip()
+    # 2) 장면 분류
     cls = scene_classifier(img, candidate_labels=SCENE_LABELS)
     scene = cls["labels"][0]
+    # 3) 템플릿 매핑
     template = TEMPLATES.get(scene, "In this picture, {caption}.")
     return template.format(caption=raw)
 with gr.Blocks() as demo:
     gr.Markdown("## 📸 TOEIC Part 1: 상황별 사진 묘사")
+    # Multiple files 업로드, filepath로 받기
+    img_inputs = gr.Files(
+        file_count="multiple",
+        type="filepath",
+        label="Upload up to 4 images"
+    )
     choice = gr.Dropdown(
         choices=[str(i) for i in range(4)],
         value="0",