Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,37 +1,78 @@
|
|
|
|
1 |
from PIL import Image
|
2 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def generate_caption(filepaths, choice_index):
|
5 |
try:
|
|
|
6 |
idx = int(choice_index)
|
7 |
img_path = filepaths[idx]
|
8 |
-
print(f"[DEBUG] Selected image path: {img_path}")
|
9 |
-
|
10 |
-
# 1) ์ด๋ฏธ์ง ๋ก๋
|
11 |
img = Image.open(img_path).convert("RGB")
|
12 |
-
print("[DEBUG] Image loaded")
|
13 |
|
14 |
-
# 2) ์บก์
์์ฑ
|
15 |
out = captioner(img)
|
16 |
-
print(f"[DEBUG] captioner output: {out!r}")
|
17 |
first = out[0] if isinstance(out, list) else out
|
18 |
raw = first.get("generated_text") or first.get("text") or str(first)
|
19 |
raw = raw.strip()
|
20 |
-
print(f"[DEBUG] raw caption: {raw!r}")
|
21 |
|
22 |
# 3) ์ฅ๋ฉด ๋ถ๋ฅ
|
23 |
cls = scene_classifier(img, candidate_labels=SCENE_LABELS)
|
24 |
-
print(f"[DEBUG] scene_classifier output: {cls!r}")
|
25 |
scene = cls["labels"][0]
|
26 |
|
27 |
-
# 4) ํ
ํ๋ฆฟ ๋งคํ
|
28 |
template = TEMPLATES.get(scene, "In this picture, {caption}.")
|
29 |
-
|
30 |
-
print(f"[DEBUG] Final result: {result}")
|
31 |
-
return result
|
32 |
|
33 |
except Exception as e:
|
34 |
-
#
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
from PIL import Image
|
3 |
+
import gradio as gr
|
4 |
+
from transformers import pipeline
|
5 |
+
|
6 |
+
# 1) ํ์ดํ๋ผ์ธ ์ด๊ธฐํ
|
7 |
+
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
|
8 |
+
scene_classifier = pipeline(
|
9 |
+
"zero-shot-image-classification",
|
10 |
+
model="openai/clip-vit-base-patch32"
|
11 |
+
)
|
12 |
+
|
13 |
+
# 2) ์ฅ๋ฉด ๋ ์ด๋ธ & ํ
ํ๋ฆฟ ์ ์
|
14 |
+
SCENE_LABELS = [
|
15 |
+
"outdoor", "indoor", "beach", "office", "street",
|
16 |
+
"restaurant", "park", "sports", "kitchen", "mountain"
|
17 |
+
]
|
18 |
+
TEMPLATES = {
|
19 |
+
"outdoor": "In this picture, {caption}. It looks like a pleasant outdoor setting, and the subject seems relaxed.",
|
20 |
+
"indoor": "In this picture, {caption}. It appears to be indoors, perhaps at home or in an office environment.",
|
21 |
+
"beach": "In this picture, {caption}. It seems to be on a beach, and the atmosphere looks warm and sunny.",
|
22 |
+
"office": "In this picture, {caption}. It looks like an office scene, with people engaged in work or discussion.",
|
23 |
+
"street": "In this picture, {caption}. The scene appears to be on a busy street, with vehicles and pedestrians.",
|
24 |
+
"restaurant": "In this picture, {caption}. It looks like a restaurant setting, where people are dining together.",
|
25 |
+
"park": "In this picture, {caption}. The location seems to be a park, with trees and open space.",
|
26 |
+
"sports": "In this picture, {caption}. It appears to be a sports activity, showing movement and action.",
|
27 |
+
"kitchen": "In this picture, {caption}. It seems to be in a kitchen, with cooking utensils visible.",
|
28 |
+
"mountain": "In this picture, {caption}. The background looks like mountains, suggesting a hiking scene."
|
29 |
+
}
|
30 |
|
31 |
def generate_caption(filepaths, choice_index):
|
32 |
try:
|
33 |
+
# 1) ์ ํ๋ ์ด๋ฏธ์ง ๋ถ๋ฌ์ค๊ธฐ
|
34 |
idx = int(choice_index)
|
35 |
img_path = filepaths[idx]
|
|
|
|
|
|
|
36 |
img = Image.open(img_path).convert("RGB")
|
|
|
37 |
|
38 |
+
# 2) ์๋ณธ ์บก์
์์ฑ
|
39 |
out = captioner(img)
|
|
|
40 |
first = out[0] if isinstance(out, list) else out
|
41 |
raw = first.get("generated_text") or first.get("text") or str(first)
|
42 |
raw = raw.strip()
|
|
|
43 |
|
44 |
# 3) ์ฅ๋ฉด ๋ถ๋ฅ
|
45 |
cls = scene_classifier(img, candidate_labels=SCENE_LABELS)
|
|
|
46 |
scene = cls["labels"][0]
|
47 |
|
48 |
+
# 4) ํ
ํ๋ฆฟ ๋งคํ ๋ฐ ๋ฆฌํด
|
49 |
template = TEMPLATES.get(scene, "In this picture, {caption}.")
|
50 |
+
return template.format(caption=raw)
|
|
|
|
|
51 |
|
52 |
except Exception as e:
|
53 |
+
# ๋ฌธ์ ๊ฐ ์์ ๊ฒฝ์ฐ ์ค๋ฅ ๋ฉ์์ง ๋ฐํ
|
54 |
+
return f"๐ด Error: {e}"
|
55 |
+
|
56 |
+
# 5) Gradio ์ธํฐํ์ด์ค ์ ์
|
57 |
+
with gr.Blocks() as demo:
|
58 |
+
gr.Markdown("## ๐ธ TOEIC Partโฏ1: ์ํฉ๋ณ ์ฌ์ง ๋ฌ์ฌ")
|
59 |
+
img_inputs = gr.Files(
|
60 |
+
file_count="multiple",
|
61 |
+
type="filepath",
|
62 |
+
label="Upload up to 4 images"
|
63 |
+
)
|
64 |
+
choice = gr.Dropdown(
|
65 |
+
choices=[str(i) for i in range(4)],
|
66 |
+
value="0",
|
67 |
+
label="Which image to describe? (0โ3)"
|
68 |
+
)
|
69 |
+
btn = gr.Button("Describe")
|
70 |
+
output = gr.Textbox(label="TOEIC Partโฏ1 Response", lines=4)
|
71 |
+
btn.click(fn=generate_caption, inputs=[img_inputs, choice], outputs=output)
|
72 |
+
|
73 |
+
# 6) ์ฑ ์คํ
|
74 |
+
if __name__ == "__main__":
|
75 |
+
demo.launch(
|
76 |
+
server_name="0.0.0.0",
|
77 |
+
server_port=int(os.environ.get("PORT", 7860))
|
78 |
+
)
|