import faiss import gradio as gr import numpy as np import pandas as pd import torch.nn.functional as F from sentence_transformers import SentenceTransformer DIM = 768 model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) print("Model loaded successfully") papers_df = pd.read_csv("data/cvpr2024_papers_with_details.csv", index_col=None, on_bad_lines='skip') papers_df = papers_df[~papers_df["summary"].isna() & ~papers_df["pdf_path"].isna()] print("Data loaded successfully") with open('data/embeddings.npy', 'rb') as f: embeddings = np.load(f) index = faiss.IndexFlatL2(DIM) index.add(embeddings) print("Index loaded successfully") def encode_query(query): query_embeddings = model.encode([query], convert_to_tensor=True) query_embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],)) query_embeddings = embeddings[:, :DIM] query_embeddings = F.normalize(embeddings, p=2, dim=1) return query_embeddings def search_nearest_papers(query, k=5): query_embeddings = encode_query(query) D, I = index.search(query_embeddings.numpy(), k) return papers_df.iloc[I[0]][["title", "summary", "pdf_path"]] demo = gr.Interface( search_nearest_papers, [ "text", gr.Slider(1, 10, default=5), ], "dataframe", ) if __name__ == "__main__": demo.launch()