Spaces:

englissi
/

imagetalking

Sleeping

File size: 1,769 Bytes

import gradio as gr
from transformers import pipeline

# 1) 이미지 캡셔닝 파이프라인 초기화
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

def generate_caption(image, style):
    # 1) 원본 캡션 얻기
    output = captioner(image)
    # output 예시: [{"generated_text": "..."}] 또는 [{"text": "..."}] 혹은 ["..."]
    # 안전하게 추출
    if isinstance(output, list) and output:
        first = output[0]
        if isinstance(first, dict):
            raw_caption = first.get("generated_text") or first.get("text") or ""
        else:
            raw_caption = str(first)
    else:
        raw_caption = str(output)

    raw_caption = raw_caption.strip()
    # 2) 스타일 변환
    if style == "TOEIC Speaking Part 1":
        return f"Q: What do you see in the picture?\nA: {raw_caption.capitalize()}."
    elif style == "IELTS Describe a Photo":
        return (
            "Describe the photo in two sentences:\n"
            f"1. {raw_caption.capitalize()}.\n"
            "2. It also shows the context of daily life."
        )
    else:
        return raw_caption

with gr.Blocks() as demo:
    gr.Markdown("## 📸 이미지 캡셔닝 → English Test 스타일 문장 생성")
    with gr.Row():
        img_in = gr.Image(type="pil", label="Upload Image")
        style_sel = gr.Dropdown(
            choices=["Raw Caption", "TOEIC Speaking Part 1", "IELTS Describe a Photo"],
            value="TOEIC Speaking Part 1",
            label="시험 형식 선택"
        )
    output = gr.Textbox(label="Generated Caption", lines=4)
    btn = gr.Button("Generate")
    btn.click(fn=generate_caption, inputs=[img_in, style_sel], outputs=output)

if __name__ == "__main__":
    demo.launch()