import numpy as np import faiss from sentence_transformers import SentenceTransformer import arxiv # Fetch arXiv papers def fetch_arxiv_papers(query, max_results=10): client = arxiv.Client() search = arxiv.Search( query=query, max_results=max_results, sort_by=arxiv.SortCriterion.SubmittedDate ) results = list(client.results(search)) papers = [{"title": result.title, "summary": result.summary, "pdf_url": result.pdf_url} for result in results] return papers # Build and save FAISS index def build_faiss_index(papers, index_file="faiss_index.index"): embedder = SentenceTransformer('all-MiniLM-L6-v2') paper_embeddings = embedder.encode([paper["summary"] for paper in papers]) # Create FAISS index dimension = paper_embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(paper_embeddings) # Save index to disk faiss.write_index(index, index_file) print(f"FAISS index saved to {index_file}") # Example usage if __name__ == "__main__": query = "quantum computing" papers = fetch_arxiv_papers(query) build_faiss_index(papers)