Pedro Gengo
Adding app
1b69bf3
raw
history blame
1.27 kB
import faiss
import gradio as gr
import numpy as np
import pandas as pd
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer
DIM = 768
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
papers_df = pd.read_csv("data/cvpr2024_papers_with_details.csv", index=None, on_bad_lines='skip')
papers_df = papers_df[~papers_df["summary"].isna() & ~papers_df["pdf_path"].isna()]
with open('data/embeddings.npy', 'rb') as f:
embeddings = np.load(f)
index = faiss.IndexFlatL2(DIM)
index.add(embeddings)
def encode_query(query):
query_embeddings = model.encode([query], convert_to_tensor=True)
query_embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],))
query_embeddings = embeddings[:, :DIM]
query_embeddings = F.normalize(embeddings, p=2, dim=1)
return query_embeddings
def search_nearest_papers(query, k=5):
query_embeddings = encode_query(query)
D, I = index.search(query_embeddings.numpy(), k)
return papers_df.iloc[I[0]][["title", "summary", "pdf_path"]]
demo = gr.Interface(
search_nearest_papers,
[
"text",
gr.inputs.Slider(1, 10, default=5),
],
"dataframe",
)
if __name__ == "__main__":
demo.launch()