Ermond commited on
Commit
3b64d74
·
verified ·
1 Parent(s): 2e87247

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -31
app.py CHANGED
@@ -1,32 +1,45 @@
1
- from base64 import b64decode
2
- from io import BytesIO
3
-
4
- import gradio as gr
5
- import spaces
6
- from PIL import Image
7
- from transformers import pipeline
8
-
9
- model = pipeline(
10
- task="zero-shot-object-detection",
11
- model="google/owlvit-large-patch14",
12
- )
13
-
14
-
15
- @spaces.GPU
16
- def predict(base64: str, texts: str):
17
- decoded_img = b64decode(base64)
18
- image_stream = BytesIO(decoded_img)
19
- img = Image.open(image_stream)
20
- predictions = model(img, text_queries=["".join(list(term)).strip() for term in texts.split(",")])
21
- return predictions
22
-
23
-
24
- demo = gr.Interface(
25
- fn=predict,
26
- inputs=[
27
- gr.Text(label="Image (B64)"),
28
- gr.Text(label="Queries", placeholder="A photo of a dog,A photo of a cat")
29
- ],
30
- outputs=gr.JSON(label="Predictions"),
31
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  demo.launch()
 
1
+ # Credits to IDEA Research for the model:
2
+ # https://huggingface.co/IDEA-Research/grounding-dino-tiny
3
+
4
+ from base64 import b64decode
5
+ from io import BytesIO
6
+
7
+ import gradio as gr
8
+ import spaces
9
+ from PIL import Image
10
+ import torch
11
+ from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
12
+
13
+ model_id = "IDEA-Research/grounding-dino-tiny"
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+ model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
18
+
19
+ def predict(base64: str, queries: str, box_threshold: float, text_threshold: float):
20
+ decoded_img = b64decode(base64)
21
+ image_stream = BytesIO(decoded_img)
22
+ image = Image.open(image_stream)
23
+
24
+ inputs = processor(images=image, text=queries, return_tensors="pt").to(device)
25
+ with torch.no_grad():
26
+ outputs = model(**inputs)
27
+
28
+ results = processor.post_process_grounded_object_detection(
29
+ outputs,
30
+ inputs.input_ids,
31
+ box_threshold=box_threshold,
32
+ text_threshold=text_threshold,
33
+ target_sizes=[image.size[::-1]]
34
+ )
35
+ return results
36
+
37
+ demo = gr.Interface(
38
+ fn=predict,
39
+ inputs=[
40
+ gr.Text(label="Image (B64)"),
41
+ gr.Text(label="Queries", placeholder="A photo of a dog,A photo of a cat")
42
+ ],
43
+ outputs=gr.JSON(label="Predictions"),
44
+ )
45
  demo.launch()