englissi commited on
Commit
dfd8ef3
ยท
verified ยท
1 Parent(s): 16ac547

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -38
app.py CHANGED
@@ -1,48 +1,57 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
 
4
- # 1) ์ด๋ฏธ์ง€ ์บก์…”๋‹ ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
5
  captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 
 
 
 
6
 
7
- def generate_caption(image, style):
8
- # 1) ์›๋ณธ ์บก์…˜ ์–ป๊ธฐ
9
- output = captioner(image)
10
- # output ์˜ˆ์‹œ: [{"generated_text": "..."}] ๋˜๋Š” [{"text": "..."}] ํ˜น์€ ["..."]
11
- # ์•ˆ์ „ํ•˜๊ฒŒ ์ถ”์ถœ
12
- if isinstance(output, list) and output:
13
- first = output[0]
14
- if isinstance(first, dict):
15
- raw_caption = first.get("generated_text") or first.get("text") or ""
16
- else:
17
- raw_caption = str(first)
18
- else:
19
- raw_caption = str(output)
20
-
21
- raw_caption = raw_caption.strip()
22
- # 2) ์Šคํƒ€์ผ ๋ณ€ํ™˜
23
- if style == "TOEIC Speaking Part 1":
24
- return f"Q: What do you see in the picture?\nA: {raw_caption.capitalize()}."
25
- elif style == "IELTS Describe a Photo":
26
- return (
27
- "Describe the photo in two sentences:\n"
28
- f"1. {raw_caption.capitalize()}.\n"
29
- "2. It also shows the context of daily life."
30
- )
31
- else:
32
- return raw_caption
 
 
 
 
 
 
33
 
34
  with gr.Blocks() as demo:
35
- gr.Markdown("## ๐Ÿ“ธ ์ด๋ฏธ์ง€ ์บก์…”๋‹ โ†’ English Test ์Šคํƒ€์ผ ๋ฌธ์žฅ ์ƒ์„ฑ")
36
- with gr.Row():
37
- img_in = gr.Image(type="pil", label="Upload Image")
38
- style_sel = gr.Dropdown(
39
- choices=["Raw Caption", "TOEIC Speaking Part 1", "IELTS Describe a Photo"],
40
- value="TOEIC Speaking Part 1",
41
- label="์‹œํ—˜ ํ˜•์‹ ์„ ํƒ"
42
- )
43
- output = gr.Textbox(label="Generated Caption", lines=4)
44
- btn = gr.Button("Generate")
45
- btn.click(fn=generate_caption, inputs=[img_in, style_sel], outputs=output)
46
 
47
  if __name__ == "__main__":
48
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
 
 
4
  captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
5
+ scene_classifier = pipeline(
6
+ "zero-shot-image-classification",
7
+ model="openai/clip-vit-base-patch32"
8
+ )
9
 
10
+ SCENE_LABELS = ["outdoor", "indoor", "beach", "office", "street",
11
+ "restaurant", "park", "sports", "kitchen", "mountain"]
12
+ TEMPLATES = {
13
+ "outdoor": "In this picture, {caption}. It looks like a pleasant outdoor setting, and the subject seems relaxed.",
14
+ "indoor": "In this picture, {caption}. It appears to be indoors, perhaps at home or in an office environment.",
15
+ "beach": "In this picture, {caption}. It seems to be on a beach, and the atmosphere looks warm and sunny.",
16
+ "office": "In this picture, {caption}. It looks like an office scene, with people engaged in work or discussion.",
17
+ "street": "In this picture, {caption}. The scene appears to be on a busy street, with vehicles and pedestrians.",
18
+ "restaurant": "In this picture, {caption}. It looks like a restaurant setting, where people are dining together.",
19
+ "park": "In this picture, {caption}. The location seems to be a park, with trees and open space.",
20
+ "sports": "In this picture, {caption}. It appears to be a sports activity, showing movement and action.",
21
+ "kitchen": "In this picture, {caption}. It seems to be in a kitchen, with cooking utensils visible.",
22
+ "mountain": "In this picture, {caption}. The background looks like mountains, suggesting a hiking scene."
23
+ }
24
+
25
+ def generate_caption(images, choice_index):
26
+ idx = int(choice_index)
27
+ img = images[idx]
28
+
29
+ # raw caption
30
+ out = captioner(img)
31
+ first = out[0] if isinstance(out, list) else out
32
+ raw = first.get("generated_text") or first.get("text") or str(first)
33
+ raw = raw.strip()
34
+
35
+ # scene classification
36
+ cls = scene_classifier(img, candidate_labels=SCENE_LABELS)
37
+ scene = cls["labels"][0]
38
+
39
+ # template mapping
40
+ template = TEMPLATES.get(scene, "In this picture, {caption}.")
41
+ return template.format(caption=raw)
42
 
43
  with gr.Blocks() as demo:
44
+ gr.Markdown("## ๐Ÿ“ธ TOEIC Partโ€ฏ1: ์ƒํ™ฉ๋ณ„ ์‚ฌ์ง„ ๋ฌ˜์‚ฌ")
45
+ img_inputs = gr.Files(file_count="multiple", type="pil",
46
+ label="Upload up to 4 images")
47
+ choice = gr.Dropdown(
48
+ choices=[str(i) for i in range(4)],
49
+ value="0",
50
+ label="Which image to describe? (0โ€“3)"
51
+ )
52
+ btn = gr.Button("Describe")
53
+ output = gr.Textbox(label="TOEIC Partโ€ฏ1 Response", lines=4)
54
+ btn.click(fn=generate_caption, inputs=[img_inputs, choice], outputs=output)
55
 
56
  if __name__ == "__main__":
57
  demo.launch()