File size: 901 Bytes
80645ea 92cc33a 80645ea 92cc33a 80645ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from transformers import AutoTokenizer, AutoModelForVision2Seq
import gradio as gr
from PIL import Image
# ε θ½½ Qwen2-VL-7B 樑ε
MODEL_NAME = "Qwen/Qwen2-VL-7B"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME)
def generate_response(image_path, text_prompt):
image = Image.open(image_path)
inputs = tokenizer(text_prompt, return_tensors="pt").to(model.device)
vision_inputs = model.processor(images=image, return_tensors="pt").to(model.device)
outputs = model.generate(**vision_inputs, **inputs)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Gradio ηι’
iface = gr.Interface(
fn=generate_response,
inputs=[gr.Image(type="filepath"), gr.Textbox(label="Text Prompt")],
outputs="text",
title="Qwen2-VL-7B Image + Text Generator"
)
if __name__ == "__main__":
iface.launch()
|