Spaces:
Running
on
T4
Running
on
T4
use correct preprocessing
Browse files
app.py
CHANGED
|
@@ -62,6 +62,12 @@ def create_hnsw_index(embeddings_np, space='ip', ef_construction=100, M=16):
|
|
| 62 |
index.add_items(embeddings_np, ids)
|
| 63 |
return index
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
app = FastAPI()
|
| 66 |
|
| 67 |
class EmbeddingsSimilarityReq(BaseModel):
|
|
@@ -74,7 +80,8 @@ async def find_similar_paragraphsitem(req: EmbeddingsSimilarityReq):
|
|
| 74 |
print("Len of batches", len(req.paragraphs))
|
| 75 |
|
| 76 |
print("creating embeddings", current_timestamp())
|
| 77 |
-
|
|
|
|
| 78 |
query_embedding, chunks_embeddings = embeddings_np[0], embeddings_np[1:]
|
| 79 |
|
| 80 |
print("creating index", current_timestamp())
|
|
|
|
| 62 |
index.add_items(embeddings_np, ids)
|
| 63 |
return index
|
| 64 |
|
| 65 |
+
def preprocess_texts(query, paragraphs):
|
| 66 |
+
query = f'query: {query}'
|
| 67 |
+
paragraphs = [f'passage: {p}' for p in paragraphs]
|
| 68 |
+
return [query]+paragraphs
|
| 69 |
+
|
| 70 |
+
|
| 71 |
app = FastAPI()
|
| 72 |
|
| 73 |
class EmbeddingsSimilarityReq(BaseModel):
|
|
|
|
| 80 |
print("Len of batches", len(req.paragraphs))
|
| 81 |
|
| 82 |
print("creating embeddings", current_timestamp())
|
| 83 |
+
inputs = preprocess_texts(req.query, req.paragraphs)
|
| 84 |
+
embeddings_np = get_embeddings(inputs)
|
| 85 |
query_embedding, chunks_embeddings = embeddings_np[0], embeddings_np[1:]
|
| 86 |
|
| 87 |
print("creating index", current_timestamp())
|