Spaces:
Running
on
T4
Running
on
T4
use correct preprocessing
Browse files
app.py
CHANGED
@@ -62,6 +62,12 @@ def create_hnsw_index(embeddings_np, space='ip', ef_construction=100, M=16):
|
|
62 |
index.add_items(embeddings_np, ids)
|
63 |
return index
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
app = FastAPI()
|
66 |
|
67 |
class EmbeddingsSimilarityReq(BaseModel):
|
@@ -74,7 +80,8 @@ async def find_similar_paragraphsitem(req: EmbeddingsSimilarityReq):
|
|
74 |
print("Len of batches", len(req.paragraphs))
|
75 |
|
76 |
print("creating embeddings", current_timestamp())
|
77 |
-
|
|
|
78 |
query_embedding, chunks_embeddings = embeddings_np[0], embeddings_np[1:]
|
79 |
|
80 |
print("creating index", current_timestamp())
|
|
|
62 |
index.add_items(embeddings_np, ids)
|
63 |
return index
|
64 |
|
65 |
+
def preprocess_texts(query, paragraphs):
|
66 |
+
query = f'query: {query}'
|
67 |
+
paragraphs = [f'passage: {p}' for p in paragraphs]
|
68 |
+
return [query]+paragraphs
|
69 |
+
|
70 |
+
|
71 |
app = FastAPI()
|
72 |
|
73 |
class EmbeddingsSimilarityReq(BaseModel):
|
|
|
80 |
print("Len of batches", len(req.paragraphs))
|
81 |
|
82 |
print("creating embeddings", current_timestamp())
|
83 |
+
inputs = preprocess_texts(req.query, req.paragraphs)
|
84 |
+
embeddings_np = get_embeddings(inputs)
|
85 |
query_embedding, chunks_embeddings = embeddings_np[0], embeddings_np[1:]
|
86 |
|
87 |
print("creating index", current_timestamp())
|