Spaces:
Runtime error
Runtime error
Update Similarity.py
Browse files- Similarity.py +15 -3
Similarity.py
CHANGED
@@ -1,11 +1,21 @@
|
|
1 |
import nltk
|
2 |
nltk.data.path.append("./nltk_data")
|
3 |
from sentence_transformers import SentenceTransformer, util
|
|
|
4 |
class Similarity:
|
5 |
def __init__(self):
|
6 |
-
self.model =
|
7 |
-
#
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def chunk_text(self, text, chunk_size=1400, overlap_size=200):
|
11 |
sentences = nltk.sent_tokenize(text)
|
@@ -23,6 +33,8 @@ class Similarity:
|
|
23 |
return chunks
|
24 |
|
25 |
def get_sim_text(self, text, claim_embedding, min_threshold=0.4, chunk_size=1500):
|
|
|
|
|
26 |
if not text:
|
27 |
return []
|
28 |
|
|
|
1 |
import nltk
|
2 |
nltk.data.path.append("./nltk_data")
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
+
|
5 |
class Similarity:
|
6 |
def __init__(self):
|
7 |
+
self.model = None
|
8 |
+
# Download punkt tokenizer once, suppress if already present
|
9 |
+
try:
|
10 |
+
nltk.data.find('tokenizers/punkt')
|
11 |
+
except LookupError:
|
12 |
+
nltk.download('punkt', download_dir='./nltk_data')
|
13 |
+
|
14 |
+
def load_model(self):
|
15 |
+
if self.model is None:
|
16 |
+
print("Loading SentenceTransformer model...")
|
17 |
+
self.model = SentenceTransformer("lighteternal/stsb-xlm-r-greek-transfer")
|
18 |
+
print("Model loaded.")
|
19 |
|
20 |
def chunk_text(self, text, chunk_size=1400, overlap_size=200):
|
21 |
sentences = nltk.sent_tokenize(text)
|
|
|
33 |
return chunks
|
34 |
|
35 |
def get_sim_text(self, text, claim_embedding, min_threshold=0.4, chunk_size=1500):
|
36 |
+
self.load_model()
|
37 |
+
|
38 |
if not text:
|
39 |
return []
|
40 |
|