Spaces:

englissi
/

imagetalking

Sleeping

imagetalking / app.py

Update app.py

16ac547 verified 24 days ago

1.77 kB

	import gradio as gr
	from transformers import pipeline

	# 1) 이미지 캡셔닝 파이프라인 초기화
	captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

	def generate_caption(image, style):
	# 1) 원본 캡션 얻기
	output = captioner(image)
	# output 예시: [{"generated_text": "..."}] 또는 [{"text": "..."}] 혹은 ["..."]
	# 안전하게 추출
	if isinstance(output, list) and output:
	first = output[0]
	if isinstance(first, dict):
	raw_caption = first.get("generated_text") or first.get("text") or ""
	else:
	raw_caption = str(first)
	else:
	raw_caption = str(output)

	raw_caption = raw_caption.strip()
	# 2) 스타일 변환
	if style == "TOEIC Speaking Part 1":
	return f"Q: What do you see in the picture?\nA: {raw_caption.capitalize()}."
	elif style == "IELTS Describe a Photo":
	return (
	"Describe the photo in two sentences:\n"
	f"1. {raw_caption.capitalize()}.\n"
	"2. It also shows the context of daily life."
	)
	else:
	return raw_caption

	with gr.Blocks() as demo:
	gr.Markdown("## 📸 이미지 캡셔닝 → English Test 스타일 문장 생성")
	with gr.Row():
	img_in = gr.Image(type="pil", label="Upload Image")
	style_sel = gr.Dropdown(
	choices=["Raw Caption", "TOEIC Speaking Part 1", "IELTS Describe a Photo"],
	value="TOEIC Speaking Part 1",
	label="시험 형식 선택"
	)
	output = gr.Textbox(label="Generated Caption", lines=4)
	btn = gr.Button("Generate")
	btn.click(fn=generate_caption, inputs=[img_in, style_sel], outputs=output)

	if __name__ == "__main__":
	demo.launch()