Spaces:

englissi
/

imagetalking

Sleeping

App Files Files Community

imagetalking / app.py

englissi

Update app.py

73e2c66 verified 13 days ago

raw

history blame contribute delete

2.9 kB

	import os
	from PIL import Image
	import gradio as gr
	from transformers import pipeline

	# 1) 파이프라인 초기화
	captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
	scene_classifier = pipeline(
	"zero-shot-image-classification",
	model="openai/clip-vit-base-patch32"
	)

	# 2) 장면 레이블 & 템플릿 정의
	SCENE_LABELS = [
	"outdoor", "indoor", "beach", "office", "street",
	"restaurant", "park", "sports", "kitchen", "mountain"
	]
	TEMPLATES = {
	"outdoor": "In this picture, {caption}. It looks like a pleasant outdoor setting, and the subject seems relaxed.",
	"indoor": "In this picture, {caption}. It appears to be indoors, perhaps at home or in an office environment.",
	"beach": "In this picture, {caption}. It seems to be on a beach, and the atmosphere looks warm and sunny.",
	"office": "In this picture, {caption}. It looks like an office scene, with people engaged in work or discussion.",
	"street": "In this picture, {caption}. The scene appears to be on a busy street, with vehicles and pedestrians.",
	"restaurant": "In this picture, {caption}. It looks like a restaurant setting, where people are dining together.",
	"park": "In this picture, {caption}. The location seems to be a park, with trees and open space.",
	"sports": "In this picture, {caption}. It appears to be a sports activity, showing movement and action.",
	"kitchen": "In this picture, {caption}. It seems to be in a kitchen, with cooking utensils visible.",
	"mountain": "In this picture, {caption}. The background looks like mountains, suggesting a hiking scene."
	}

	def generate_caption(image_path):
	try:
	# 1) 이미지 불러오기
	img = Image.open(image_path).convert("RGB")

	# 2) 원본 캡션 생성
	out = captioner(img)
	first = out[0] if isinstance(out, list) else out
	raw = first.get("generated_text") or first.get("text") or str(first)
	raw = raw.strip()

	# 3) 장면 분류
	cls = scene_classifier(img, candidate_labels=SCENE_LABELS)
	scene = cls["labels"][0]

	# 4) 템플릿 매핑 및 리턴
	template = TEMPLATES.get(scene, "In this picture, {caption}.")
	return template.format(caption=raw)

	except Exception as e:
	return f"🔴 Error: {e}"

	# 5) Gradio 인터페이스 정의
	with gr.Blocks() as demo:
	gr.Markdown("## 📸 TOEIC Part 1: 상황별 사진 묘사 (Single Image)")
	img_in = gr.Image(type="filepath", label="Upload an image")
	btn = gr.Button("Describe")
	output = gr.Textbox(label="TOEIC Part 1 Response", lines=4)
	btn.click(fn=generate_caption, inputs=img_in, outputs=output)

	# 6) 앱 실행
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=int(os.environ.get("PORT", 7860))
	)