Spaces:

Penality
/

pdf-something

Sleeping

Penality commited on Feb 26

Commit

cccc448

verified ·

1 Parent(s): 2319f8b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,8 @@ import os
 import pdfplumber
 import requests
 import together
-from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 import re
@@ -54,6 +55,7 @@ def store_document_data(PDF_FILE):
         try:
             index = faiss.IndexFlatL2(embedding.shape[1])
             print(index, index.ntotal)
             index_file = "index.bin"
             faiss.write_index(index, index_file)
@@ -61,7 +63,7 @@ def store_document_data(PDF_FILE):
             with open(index_file, "rb") as f:
                 response = requests.post(API_URL_EMBEDDINGS, files={"file": f})
-            response = requests.post(API_URL_METADATA, json={"doc_index": max(0, doc_index)})
             print("sent")
         except requests.exceptions.RequestException as e:
@@ -103,7 +105,8 @@ def clean_text(text):
 def extract_text_from_pdf(pdf_file):
     """Extract and clean text from the uploaded PDF."""
-    print("extracting")
     try:
         with pdfplumber.open(pdf_file) as pdf:
             text = " ".join(clean_text(text) for page in pdf.pages if (text := page.extract_text()))

 import pdfplumber
 import requests
 import together
+from sentence_transformers import Sentencindex = faiss.IndexFlatL2(embedding.shape[1])
+            print(index, index.ntotal)eTransformer
 import faiss
 import numpy as np
 import re
         try:
             index = faiss.IndexFlatL2(embedding.shape[1])
+            index.add(embedding)  # Add embedding
             print(index, index.ntotal)
             index_file = "index.bin"
             faiss.write_index(index, index_file)
             with open(index_file, "rb") as f:
                 response = requests.post(API_URL_EMBEDDINGS, files={"file": f})
+            response = requests.post(API_URL_METADATA, json={"doc_index": doc_index})
             print("sent")
         except requests.exceptions.RequestException as e:
 def extract_text_from_pdf(pdf_file):
     """Extract and clean text from the uploaded PDF."""
+    print("extracting")index = faiss.IndexFlatL2(embedding.shape[1])
+            print(index, index.ntotal)
     try:
         with pdfplumber.open(pdf_file) as pdf:
             text = " ".join(clean_text(text) for page in pdf.pages if (text := page.extract_text()))