Spaces:
Sleeping
Sleeping
File size: 1,136 Bytes
99637f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import arxiv
# Fetch arXiv papers
def fetch_arxiv_papers(query, max_results=10):
client = arxiv.Client()
search = arxiv.Search(
query=query,
max_results=max_results,
sort_by=arxiv.SortCriterion.SubmittedDate
)
results = list(client.results(search))
papers = [{"title": result.title, "summary": result.summary, "pdf_url": result.pdf_url} for result in results]
return papers
# Build and save FAISS index
def build_faiss_index(papers, index_file="faiss_index.index"):
embedder = SentenceTransformer('all-MiniLM-L6-v2')
paper_embeddings = embedder.encode([paper["summary"] for paper in papers])
# Create FAISS index
dimension = paper_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(paper_embeddings)
# Save index to disk
faiss.write_index(index, index_file)
print(f"FAISS index saved to {index_file}")
# Example usage
if __name__ == "__main__":
query = "quantum computing"
papers = fetch_arxiv_papers(query)
build_faiss_index(papers) |