Spaces:
Runtime error
Runtime error
import torch | |
import cv2 | |
import gradio as gr | |
import numpy as np | |
from transformers import OwlViTProcessor, OwlViTForObjectDetection | |
import requests | |
# 如果GPU可用,就使用GPU,否则使用CPU | |
if torch.cuda.is_available(): | |
device = torch.device("cuda") | |
else: | |
device = torch.device("cpu") | |
# 从预训练模型"google/owlvit-large-patch14"加载OWL-ViT模型,并将其放置到适当的设备上 | |
model = OwlViTForObjectDetection.from_pretrained("google/owlvit-large-patch14").to(device) | |
model.eval() | |
# 从同一预训练模型中加载处理器 | |
processor = OwlViTProcessor.from_pretrained("google/owlvit-large-patch14") | |
# 定义一个函数来处理图像URL,文本查询和分数阈值 | |
def query_image(img_url, text_queries, score_threshold): | |
# 使用requests库从URL中获取图像 | |
response = requests.get(img_url) | |
response.raise_for_status() | |
arr = np.asarray(bytearray(response.content), dtype=np.uint8) | |
img = cv2.imdecode(arr, -1) # 使用-1来加载原始图像 | |
text_queries = text_queries.split(",") # 将文本查询分割成独立的查询 | |
target_sizes = torch.Tensor([img.shape[:2]]) | |
inputs = processor(text=text_queries, images=img, return_tensors="pt").to(device) # 使用处理器创建模型的输入 | |
with torch.no_grad(): | |
outputs = model(**inputs) # 获取模型的输出 | |
# 将输出转移到CPU上 | |
outputs.logits = outputs.logits.cpu() | |
outputs.pred_boxes = outputs.pred_boxes.cpu() | |
# 使用处理器进行后处理 | |
results = processor.post_process(outputs=outputs, target_sizes=target_sizes) | |
boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"] | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
# 在图像上绘制边界框并添加标签 | |
for box, score, label in zip(boxes, scores, labels): | |
box = [int(i) for i in box.tolist()] | |
if score >= score_threshold: | |
img = cv2.rectangle(img, box[:2], box[2:], (255,0,0), 5) | |
y = box[3] - 10 if box[3] + 25 > 768 else box[3] + 25 | |
img = cv2.putText( | |
img, text_queries[label], (box[0], y), font, 1, (255,0,0), 2, cv2.LINE_AA | |
) | |
return img | |
description = """ | |
Gradio demo for OWL-ViT. | |
You can use OWL-ViT to query images with text descriptions of any object. | |
To use it, simply provide an image URL and enter comma separated text descriptions of objects you want to query the image for. | |
You can also use the score threshold slider to set a threshold to filter out low probability predictions. | |
""" | |
# 创建一个Gradio界面 | |
demo = gr.Interface( | |
query_image, | |
inputs=["text", "text", gr.Slider(0, 1, value=0.1)], # 修改输入,使其接受URL而不是图像 | |
outputs="image", | |
title="Zero-Shot Object Detection with OWL-ViT", | |
description=description, | |
examples=[], # 设置为一个空列表 | |
) | |
demo.launch() |