Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,8 @@ import os
|
|
| 4 |
import pdfplumber
|
| 5 |
import requests
|
| 6 |
import together
|
| 7 |
-
from sentence_transformers import
|
|
|
|
| 8 |
import faiss
|
| 9 |
import numpy as np
|
| 10 |
import re
|
|
@@ -54,6 +55,7 @@ def store_document_data(PDF_FILE):
|
|
| 54 |
|
| 55 |
try:
|
| 56 |
index = faiss.IndexFlatL2(embedding.shape[1])
|
|
|
|
| 57 |
print(index, index.ntotal)
|
| 58 |
index_file = "index.bin"
|
| 59 |
faiss.write_index(index, index_file)
|
|
@@ -61,7 +63,7 @@ def store_document_data(PDF_FILE):
|
|
| 61 |
with open(index_file, "rb") as f:
|
| 62 |
response = requests.post(API_URL_EMBEDDINGS, files={"file": f})
|
| 63 |
|
| 64 |
-
response = requests.post(API_URL_METADATA, json={"doc_index":
|
| 65 |
|
| 66 |
print("sent")
|
| 67 |
except requests.exceptions.RequestException as e:
|
|
@@ -103,7 +105,8 @@ def clean_text(text):
|
|
| 103 |
|
| 104 |
def extract_text_from_pdf(pdf_file):
|
| 105 |
"""Extract and clean text from the uploaded PDF."""
|
| 106 |
-
print("extracting")
|
|
|
|
| 107 |
try:
|
| 108 |
with pdfplumber.open(pdf_file) as pdf:
|
| 109 |
text = " ".join(clean_text(text) for page in pdf.pages if (text := page.extract_text()))
|
|
|
|
| 4 |
import pdfplumber
|
| 5 |
import requests
|
| 6 |
import together
|
| 7 |
+
from sentence_transformers import Sentencindex = faiss.IndexFlatL2(embedding.shape[1])
|
| 8 |
+
print(index, index.ntotal)eTransformer
|
| 9 |
import faiss
|
| 10 |
import numpy as np
|
| 11 |
import re
|
|
|
|
| 55 |
|
| 56 |
try:
|
| 57 |
index = faiss.IndexFlatL2(embedding.shape[1])
|
| 58 |
+
index.add(embedding) # Add embedding
|
| 59 |
print(index, index.ntotal)
|
| 60 |
index_file = "index.bin"
|
| 61 |
faiss.write_index(index, index_file)
|
|
|
|
| 63 |
with open(index_file, "rb") as f:
|
| 64 |
response = requests.post(API_URL_EMBEDDINGS, files={"file": f})
|
| 65 |
|
| 66 |
+
response = requests.post(API_URL_METADATA, json={"doc_index": doc_index})
|
| 67 |
|
| 68 |
print("sent")
|
| 69 |
except requests.exceptions.RequestException as e:
|
|
|
|
| 105 |
|
| 106 |
def extract_text_from_pdf(pdf_file):
|
| 107 |
"""Extract and clean text from the uploaded PDF."""
|
| 108 |
+
print("extracting")index = faiss.IndexFlatL2(embedding.shape[1])
|
| 109 |
+
print(index, index.ntotal)
|
| 110 |
try:
|
| 111 |
with pdfplumber.open(pdf_file) as pdf:
|
| 112 |
text = " ".join(clean_text(text) for page in pdf.pages if (text := page.extract_text()))
|