Spaces:
Sleeping
Sleeping
import faiss | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import torch.nn.functional as F | |
from sentence_transformers import SentenceTransformer | |
DIM = 768 | |
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) | |
papers_df = pd.read_csv("data/cvpr2024_papers_with_details.csv", index=None, on_bad_lines='skip') | |
papers_df = papers_df[~papers_df["summary"].isna() & ~papers_df["pdf_path"].isna()] | |
with open('data/embeddings.npy', 'rb') as f: | |
embeddings = np.load(f) | |
index = faiss.IndexFlatL2(DIM) | |
index.add(embeddings) | |
def encode_query(query): | |
query_embeddings = model.encode([query], convert_to_tensor=True) | |
query_embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],)) | |
query_embeddings = embeddings[:, :DIM] | |
query_embeddings = F.normalize(embeddings, p=2, dim=1) | |
return query_embeddings | |
def search_nearest_papers(query, k=5): | |
query_embeddings = encode_query(query) | |
D, I = index.search(query_embeddings.numpy(), k) | |
return papers_df.iloc[I[0]][["title", "summary", "pdf_path"]] | |
demo = gr.Interface( | |
search_nearest_papers, | |
[ | |
"text", | |
gr.inputs.Slider(1, 10, default=5), | |
], | |
"dataframe", | |
) | |
if __name__ == "__main__": | |
demo.launch() | |