Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,8 @@ import os
|
|
4 |
import pdfplumber
|
5 |
import requests
|
6 |
import together
|
7 |
-
from sentence_transformers import
|
|
|
8 |
import faiss
|
9 |
import numpy as np
|
10 |
import re
|
@@ -54,6 +55,7 @@ def store_document_data(PDF_FILE):
|
|
54 |
|
55 |
try:
|
56 |
index = faiss.IndexFlatL2(embedding.shape[1])
|
|
|
57 |
print(index, index.ntotal)
|
58 |
index_file = "index.bin"
|
59 |
faiss.write_index(index, index_file)
|
@@ -61,7 +63,7 @@ def store_document_data(PDF_FILE):
|
|
61 |
with open(index_file, "rb") as f:
|
62 |
response = requests.post(API_URL_EMBEDDINGS, files={"file": f})
|
63 |
|
64 |
-
response = requests.post(API_URL_METADATA, json={"doc_index":
|
65 |
|
66 |
print("sent")
|
67 |
except requests.exceptions.RequestException as e:
|
@@ -103,7 +105,8 @@ def clean_text(text):
|
|
103 |
|
104 |
def extract_text_from_pdf(pdf_file):
|
105 |
"""Extract and clean text from the uploaded PDF."""
|
106 |
-
print("extracting")
|
|
|
107 |
try:
|
108 |
with pdfplumber.open(pdf_file) as pdf:
|
109 |
text = " ".join(clean_text(text) for page in pdf.pages if (text := page.extract_text()))
|
|
|
4 |
import pdfplumber
|
5 |
import requests
|
6 |
import together
|
7 |
+
from sentence_transformers import Sentencindex = faiss.IndexFlatL2(embedding.shape[1])
|
8 |
+
print(index, index.ntotal)eTransformer
|
9 |
import faiss
|
10 |
import numpy as np
|
11 |
import re
|
|
|
55 |
|
56 |
try:
|
57 |
index = faiss.IndexFlatL2(embedding.shape[1])
|
58 |
+
index.add(embedding) # Add embedding
|
59 |
print(index, index.ntotal)
|
60 |
index_file = "index.bin"
|
61 |
faiss.write_index(index, index_file)
|
|
|
63 |
with open(index_file, "rb") as f:
|
64 |
response = requests.post(API_URL_EMBEDDINGS, files={"file": f})
|
65 |
|
66 |
+
response = requests.post(API_URL_METADATA, json={"doc_index": doc_index})
|
67 |
|
68 |
print("sent")
|
69 |
except requests.exceptions.RequestException as e:
|
|
|
105 |
|
106 |
def extract_text_from_pdf(pdf_file):
|
107 |
"""Extract and clean text from the uploaded PDF."""
|
108 |
+
print("extracting")index = faiss.IndexFlatL2(embedding.shape[1])
|
109 |
+
print(index, index.ntotal)
|
110 |
try:
|
111 |
with pdfplumber.open(pdf_file) as pdf:
|
112 |
text = " ".join(clean_text(text) for page in pdf.pages if (text := page.extract_text()))
|