AminFaraji commited on
Commit
064423d
·
verified ·
1 Parent(s): 297bc3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -0
app.py CHANGED
@@ -52,7 +52,26 @@ model = model.eval()
52
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
53
  print(f"Model device: {model.device}")
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
 
56
 
57
 
58
 
 
52
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
53
  print(f"Model device: {model.device}")
54
 
55
+ # a custom embedding
56
+ from sentence_transformers import SentenceTransformer
57
+ from langchain_experimental.text_splitter import SemanticChunker
58
+ from typing import List
59
+
60
+
61
+ class MyEmbeddings:
62
+ def __init__(self):
63
+ self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
64
+ #self.model=model
65
+
66
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
67
+ return [self.model.encode(t).tolist() for t in texts]
68
+ def embed_query(self, query: str) -> List[float]:
69
+ return [self.model.encode([query])][0][0].tolist()
70
+
71
+
72
+ embeddings = MyEmbeddings()
73
 
74
+ splitter = SemanticChunker(embeddings)
75
 
76
 
77