File size: 1,136 Bytes
99637f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import arxiv

# Fetch arXiv papers
def fetch_arxiv_papers(query, max_results=10):
    client = arxiv.Client()
    search = arxiv.Search(
        query=query,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.SubmittedDate
    )
    results = list(client.results(search))
    papers = [{"title": result.title, "summary": result.summary, "pdf_url": result.pdf_url} for result in results]
    return papers

# Build and save FAISS index
def build_faiss_index(papers, index_file="faiss_index.index"):
    embedder = SentenceTransformer('all-MiniLM-L6-v2')
    paper_embeddings = embedder.encode([paper["summary"] for paper in papers])

    # Create FAISS index
    dimension = paper_embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(paper_embeddings)

    # Save index to disk
    faiss.write_index(index, index_file)
    print(f"FAISS index saved to {index_file}")

# Example usage
if __name__ == "__main__":
    query = "quantum computing"
    papers = fetch_arxiv_papers(query)
    build_faiss_index(papers)