Spaces:
Sleeping
Sleeping
import numpy as np | |
import faiss | |
from sentence_transformers import SentenceTransformer | |
import arxiv | |
# Fetch arXiv papers | |
def fetch_arxiv_papers(query, max_results=10): | |
client = arxiv.Client() | |
search = arxiv.Search( | |
query=query, | |
max_results=max_results, | |
sort_by=arxiv.SortCriterion.SubmittedDate | |
) | |
results = list(client.results(search)) | |
papers = [{"title": result.title, "summary": result.summary, "pdf_url": result.pdf_url} for result in results] | |
return papers | |
# Build and save FAISS index | |
def build_faiss_index(papers, index_file="faiss_index.index"): | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
paper_embeddings = embedder.encode([paper["summary"] for paper in papers]) | |
# Create FAISS index | |
dimension = paper_embeddings.shape[1] | |
index = faiss.IndexFlatL2(dimension) | |
index.add(paper_embeddings) | |
# Save index to disk | |
faiss.write_index(index, index_file) | |
print(f"FAISS index saved to {index_file}") | |
# Example usage | |
if __name__ == "__main__": | |
query = "quantum computing" | |
papers = fetch_arxiv_papers(query) | |
build_faiss_index(papers) |