Spaces:

englissi
/

imagetalking

Running

App Files Files Community

englissi commited on 28 days ago

Commit

e4d072e

verified ·

1 Parent(s): 2f5409a

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -17

app.py CHANGED Viewed

@@ -1,37 +1,78 @@
 from PIL import Image
-import traceback
 def generate_caption(filepaths, choice_index):
     try:
         idx = int(choice_index)
         img_path = filepaths[idx]
-        print(f"[DEBUG] Selected image path: {img_path}")
-        # 1) 이미지 로드
         img = Image.open(img_path).convert("RGB")
-        print("[DEBUG] Image loaded")
-        # 2) 캡션 생성
         out = captioner(img)
-        print(f"[DEBUG] captioner output: {out!r}")
         first = out[0] if isinstance(out, list) else out
         raw = first.get("generated_text") or first.get("text") or str(first)
         raw = raw.strip()
-        print(f"[DEBUG] raw caption: {raw!r}")
         # 3) 장면 분류
         cls = scene_classifier(img, candidate_labels=SCENE_LABELS)
-        print(f"[DEBUG] scene_classifier output: {cls!r}")
         scene = cls["labels"][0]
-        # 4) 템플릿 매핑
         template = TEMPLATES.get(scene, "In this picture, {caption}.")
-        result = template.format(caption=raw)
-        print(f"[DEBUG] Final result: {result}")
-        return result
     except Exception as e:
-        # 에러 메시지와 스택 트레이스 리턴
-        error_msg = f"🔴 Error:\n{e}\n\n{traceback.format_exc()}"
-        print(error_msg)
-        return error_msg

+import os
 from PIL import Image
+import gradio as gr
+from transformers import pipeline
+# 1) 파이프라인 초기화
+captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+scene_classifier = pipeline(
+    "zero-shot-image-classification",
+    model="openai/clip-vit-base-patch32"
+)
+# 2) 장면 레이블 & 템플릿 정의
+SCENE_LABELS = [
+    "outdoor", "indoor", "beach", "office", "street",
+    "restaurant", "park", "sports", "kitchen", "mountain"
+]
+TEMPLATES = {
+    "outdoor":    "In this picture, {caption}. It looks like a pleasant outdoor setting, and the subject seems relaxed.",
+    "indoor":     "In this picture, {caption}. It appears to be indoors, perhaps at home or in an office environment.",
+    "beach":      "In this picture, {caption}. It seems to be on a beach, and the atmosphere looks warm and sunny.",
+    "office":     "In this picture, {caption}. It looks like an office scene, with people engaged in work or discussion.",
+    "street":     "In this picture, {caption}. The scene appears to be on a busy street, with vehicles and pedestrians.",
+    "restaurant": "In this picture, {caption}. It looks like a restaurant setting, where people are dining together.",
+    "park":       "In this picture, {caption}. The location seems to be a park, with trees and open space.",
+    "sports":     "In this picture, {caption}. It appears to be a sports activity, showing movement and action.",
+    "kitchen":    "In this picture, {caption}. It seems to be in a kitchen, with cooking utensils visible.",
+    "mountain":   "In this picture, {caption}. The background looks like mountains, suggesting a hiking scene."
+}
 def generate_caption(filepaths, choice_index):
     try:
+        # 1) 선택된 이미지 불러오기
         idx = int(choice_index)
         img_path = filepaths[idx]
         img = Image.open(img_path).convert("RGB")
+        # 2) 원본 캡션 생성
         out = captioner(img)
         first = out[0] if isinstance(out, list) else out
         raw = first.get("generated_text") or first.get("text") or str(first)
         raw = raw.strip()
         # 3) 장면 분류
         cls = scene_classifier(img, candidate_labels=SCENE_LABELS)
         scene = cls["labels"][0]
+        # 4) 템플릿 매핑 및 리턴
         template = TEMPLATES.get(scene, "In this picture, {caption}.")
+        return template.format(caption=raw)
     except Exception as e:
+        # 문제가 있을 경우 오류 메시지 반환
+        return f"🔴 Error: {e}"
+# 5) Gradio 인터페이스 정의
+with gr.Blocks() as demo:
+    gr.Markdown("## 📸 TOEIC Part 1: 상황별 사진 묘사")
+    img_inputs = gr.Files(
+        file_count="multiple",
+        type="filepath",
+        label="Upload up to 4 images"
+    )
+    choice = gr.Dropdown(
+        choices=[str(i) for i in range(4)],
+        value="0",
+        label="Which image to describe? (0–3)"
+    )
+    btn = gr.Button("Describe")
+    output = gr.Textbox(label="TOEIC Part 1 Response", lines=4)
+    btn.click(fn=generate_caption, inputs=[img_inputs, choice], outputs=output)
+# 6) 앱 실행
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=int(os.environ.get("PORT", 7860))
+    )