File size: 2,695 Bytes
ba2ab36 3617c74 ba2ab36 9ebc77b ba2ab36 3617c74 ba2ab36 3617c74 ba2ab36 c5c3fa2 ba2ab36 9ebc77b ba2ab36 9ebc77b ba2ab36 c5c3fa2 ba2ab36 3617c74 ba2ab36 c5c3fa2 ba2ab36 3617c74 ba2ab36 c5c3fa2 3617c74 9ebc77b 3617c74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import streamlit as st
from st_pages import Page, show_pages, add_page_title, Section
from lib.utils.model import get_model, get_similarities
from lib.utils.timer import timer
add_page_title()
show_pages(
[
Page('app.py', 'IRRA Text-To-Image-Retrival'),
Section('Implementation Details'),
Page('pages/losses.py', 'Loss functions'),
]
)
st.markdown('''
A text-to-image retrieval model implemented from [arXiv: Cross-Modal Implicit Relation Reasoning and Aligning for Text-to-Image Person Retrieval](https://arxiv.org/abs/2303.12501).
The uploaded images should be `384x128` with only one person in the shot.
''')
st.header('Inputs')
caption = st.text_input('Description Input')
images = st.file_uploader('Upload images', accept_multiple_files=True)
if images is not None:
st.image(images) # type: ignore
st.header('Options')
st.subheader('Ranks', help='How many predictions the model is allowed to make')
ranks = st.slider('slider_ranks', min_value=1, max_value=10, label_visibility='collapsed',value=5)
button = st.button('Match most similar', disabled=len(images) == 0 or caption == '')
if button:
st.header('Results')
with st.spinner('Loading model'):
model = get_model()
st.text(f'IRRA model loaded with {sum(p.numel() for p in model.parameters()) / 1e6:.0f}M parameters')
time = timer()
with st.spinner('Computing and ranking similarities'):
with timer() as t:
similarities = get_similarities(caption, images, model).squeeze(0)
elapsed = t()
indices = similarities.argsort(descending=True).cpu().tolist()[:ranks]
c1, c2, c3 = st.columns(3)
with c1:
st.subheader('Rank')
with c2:
st.subheader('Image')
with c3:
st.subheader('Cosine Similarity', help='Due to the nature of the SDM loss, the higher the similarity, the more similar the match is')
for i, idx in enumerate(indices):
c1, c2, c3 = st.columns(3)
with c1:
st.text(f'{i + 1}')
with c2:
st.image(images[idx])
with c3:
st.text(f'{similarities[idx].cpu():.2f}')
st.success(f'Done in {elapsed:.2f}s')
with st.sidebar:
st.title('IRRA Text-To-Image Retrival')
st.subheader('Useful Links')
st.markdown('[arXiv: Cross-Modal Implicit Relation Reasoning and Aligning for Text-to-Image Person Retrieval](https://arxiv.org/abs/2303.12501)')
st.markdown('[IRRA implementation (Pytorch Lightning + Transformers)](https://github.com/grostaco/modern-IRRA)')
st.markdown('[IRRA implementation (PyTorch)](https://github.com/anosorae/IRRA/tree/main)') |