Merge branch 'main' of https://github.com/janesjanes/tsbir
Browse files- README.md +11 -8
- code/clip/model.py +1 -5
README.md
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
# Image Retrieval with Text and Sketch
|
2 |
-
This code is for our 2022 ECCV paper [
|
3 |
|
4 |
-
<img src="https://patsorn.me/projects/tsbir/img/teaser_web_mini.jpg" width="
|
|
|
|
|
5 |
|
6 |
---------------------
|
7 |
folder structure
|
@@ -9,24 +11,25 @@ folder structure
|
|
9 |
|---model/ : Contain the trained model*
|
10 |
|---sketches/ : Contain example query sketch
|
11 |
|---images/ : Contain 100 randomly sampled images from COCO TBIR benchmark
|
12 |
-
|---notebooks/ : Contain the demo ipynb notebook
|
13 |
|---code/
|
14 |
|---training/model_configs/ : Contain model config file for the network
|
15 |
|---clip/ : Contain source code for running the notebook
|
16 |
|
17 |
-
*
|
18 |
-
|
19 |
-
This repo is based on open_clip implementation from https://github.com/mlfoundations/open_clip
|
20 |
|
21 |
## Prerequisites
|
22 |
- Pytorch
|
23 |
|
24 |
## Getting Started
|
25 |
|
26 |
-
Simply
|
|
|
|
|
|
|
|
|
27 |
|
28 |
## Download Models
|
29 |
-
Pre-trained models
|
30 |
- <a href='https://patsorn.me/projects/tsbir/data/tsbir_model_final.pt' > Pre-trained models </a>
|
31 |
|
32 |
## Citation
|
|
|
1 |
# Image Retrieval with Text and Sketch
|
2 |
+
This code is for our 2022 ECCV paper [A Sketch Is Worth a Thousand Words: Image Retrieval with Text and Sketch](https://patsorn.me/projects/tsbir/)
|
3 |
|
4 |
+
<img src="https://patsorn.me/projects/tsbir/img/teaser_web_mini.jpg" width="800px"/>
|
5 |
+
|
6 |
+
This repo is based on open_clip implementation from https://github.com/mlfoundations/open_clip
|
7 |
|
8 |
---------------------
|
9 |
folder structure
|
|
|
11 |
|---model/ : Contain the trained model*
|
12 |
|---sketches/ : Contain example query sketch
|
13 |
|---images/ : Contain 100 randomly sampled images from COCO TBIR benchmark
|
14 |
+
|---notebooks/ : Contain the demo ipynb notebook
|
15 |
|---code/
|
16 |
|---training/model_configs/ : Contain model config file for the network
|
17 |
|---clip/ : Contain source code for running the notebook
|
18 |
|
19 |
+
*need to be downloaded first
|
|
|
|
|
20 |
|
21 |
## Prerequisites
|
22 |
- Pytorch
|
23 |
|
24 |
## Getting Started
|
25 |
|
26 |
+
- Simply open jupyter notebook in `notebooks/Retrieval_Demo.ipynb` for an example of how to retrieve images using our model,
|
27 |
+
|
28 |
+
- You can use your own set of images and sketches by modifying the `images/` and `sketches/` folder accordingly.
|
29 |
+
|
30 |
+
- Colab version of the notebook is available [[here]](https://colab.research.google.com/)
|
31 |
|
32 |
## Download Models
|
|
|
33 |
- <a href='https://patsorn.me/projects/tsbir/data/tsbir_model_final.pt' > Pre-trained models </a>
|
34 |
|
35 |
## Citation
|
code/clip/model.py
CHANGED
@@ -237,10 +237,6 @@ class VisualTransformer(nn.Module):
|
|
237 |
|
238 |
return x
|
239 |
|
240 |
-
|
241 |
-
from x_transformers.autoregressive_wrapper import AutoregressiveWrapper
|
242 |
-
from x_transformers import ViTransformerWrapper, TransformerWrapper, Encoder, Decoder
|
243 |
-
|
244 |
class CLIP(nn.Module):
|
245 |
def __init__(self,
|
246 |
embed_dim: int,
|
@@ -503,4 +499,4 @@ def build_model(state_dict: dict, weight_sharing: bool, feature_fusion: str, num
|
|
503 |
convert_weights(model)
|
504 |
#TODO: only do strict=false when loading from state with 'visual2' branch
|
505 |
model.load_state_dict(state_dict, strict=False)
|
506 |
-
return model.eval()
|
|
|
237 |
|
238 |
return x
|
239 |
|
|
|
|
|
|
|
|
|
240 |
class CLIP(nn.Module):
|
241 |
def __init__(self,
|
242 |
embed_dim: int,
|
|
|
499 |
convert_weights(model)
|
500 |
#TODO: only do strict=false when loading from state with 'visual2' branch
|
501 |
model.load_state_dict(state_dict, strict=False)
|
502 |
+
return model.eval()
|