Penality commited on
Commit
cccc448
·
verified ·
1 Parent(s): 2319f8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -4,7 +4,8 @@ import os
4
  import pdfplumber
5
  import requests
6
  import together
7
- from sentence_transformers import SentenceTransformer
 
8
  import faiss
9
  import numpy as np
10
  import re
@@ -54,6 +55,7 @@ def store_document_data(PDF_FILE):
54
 
55
  try:
56
  index = faiss.IndexFlatL2(embedding.shape[1])
 
57
  print(index, index.ntotal)
58
  index_file = "index.bin"
59
  faiss.write_index(index, index_file)
@@ -61,7 +63,7 @@ def store_document_data(PDF_FILE):
61
  with open(index_file, "rb") as f:
62
  response = requests.post(API_URL_EMBEDDINGS, files={"file": f})
63
 
64
- response = requests.post(API_URL_METADATA, json={"doc_index": max(0, doc_index)})
65
 
66
  print("sent")
67
  except requests.exceptions.RequestException as e:
@@ -103,7 +105,8 @@ def clean_text(text):
103
 
104
  def extract_text_from_pdf(pdf_file):
105
  """Extract and clean text from the uploaded PDF."""
106
- print("extracting")
 
107
  try:
108
  with pdfplumber.open(pdf_file) as pdf:
109
  text = " ".join(clean_text(text) for page in pdf.pages if (text := page.extract_text()))
 
4
  import pdfplumber
5
  import requests
6
  import together
7
+ from sentence_transformers import Sentencindex = faiss.IndexFlatL2(embedding.shape[1])
8
+ print(index, index.ntotal)eTransformer
9
  import faiss
10
  import numpy as np
11
  import re
 
55
 
56
  try:
57
  index = faiss.IndexFlatL2(embedding.shape[1])
58
+ index.add(embedding) # Add embedding
59
  print(index, index.ntotal)
60
  index_file = "index.bin"
61
  faiss.write_index(index, index_file)
 
63
  with open(index_file, "rb") as f:
64
  response = requests.post(API_URL_EMBEDDINGS, files={"file": f})
65
 
66
+ response = requests.post(API_URL_METADATA, json={"doc_index": doc_index})
67
 
68
  print("sent")
69
  except requests.exceptions.RequestException as e:
 
105
 
106
  def extract_text_from_pdf(pdf_file):
107
  """Extract and clean text from the uploaded PDF."""
108
+ print("extracting")index = faiss.IndexFlatL2(embedding.shape[1])
109
+ print(index, index.ntotal)
110
  try:
111
  with pdfplumber.open(pdf_file) as pdf:
112
  text = " ".join(clean_text(text) for page in pdf.pages if (text := page.extract_text()))