Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -52,7 +52,26 @@ model = model.eval()
|
|
52 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
53 |
print(f"Model device: {model.device}")
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
|
|
56 |
|
57 |
|
58 |
|
|
|
52 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
53 |
print(f"Model device: {model.device}")
|
54 |
|
55 |
+
# a custom embedding
|
56 |
+
from sentence_transformers import SentenceTransformer
|
57 |
+
from langchain_experimental.text_splitter import SemanticChunker
|
58 |
+
from typing import List
|
59 |
+
|
60 |
+
|
61 |
+
class MyEmbeddings:
|
62 |
+
def __init__(self):
|
63 |
+
self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
64 |
+
#self.model=model
|
65 |
+
|
66 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
67 |
+
return [self.model.encode(t).tolist() for t in texts]
|
68 |
+
def embed_query(self, query: str) -> List[float]:
|
69 |
+
return [self.model.encode([query])][0][0].tolist()
|
70 |
+
|
71 |
+
|
72 |
+
embeddings = MyEmbeddings()
|
73 |
|
74 |
+
splitter = SemanticChunker(embeddings)
|
75 |
|
76 |
|
77 |
|