Hidayatmahar commited on
Commit
ca32445
·
verified ·
1 Parent(s): 761c00b

Delete create_faiss.py

Browse files
Files changed (1) hide show
  1. create_faiss.py +0 -26
create_faiss.py DELETED
@@ -1,26 +0,0 @@
1
- from datasets import load_dataset
2
- import faiss
3
- from sentence_transformers import SentenceTransformer
4
- import numpy as np
5
-
6
- # Load the US-LegalKit dataset
7
- dataset = load_dataset("macadeliccc/US-LegalKit", split="train")
8
-
9
- # Extract legal text documents
10
- law_data = [item['text'] for item in dataset if 'text' in item]
11
-
12
- # Load embedding model
13
- model = SentenceTransformer("all-MiniLM-L6-v2")
14
-
15
- # Generate embeddings
16
- embeddings = model.encode(law_data, convert_to_numpy=True)
17
-
18
- # Create FAISS index
19
- dimension = embeddings.shape[1]
20
- index = faiss.IndexFlatL2(dimension) # L2 Distance Index
21
- index.add(embeddings) # Add vectors to FAISS index
22
-
23
- # Save FAISS index
24
- faiss.write_index(index, "faiss_index.bin")
25
-
26
- print("✅ FAISS index saved successfully as 'faiss_index.bin'!")