Penality commited on
Commit
2646d8d
·
verified ·
1 Parent(s): 7f8ac14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -15,6 +15,7 @@ from flask import jsonify
15
  load_dotenv()
16
 
17
  API_URL_EMBEDDINGS = f"https://e4e5-196-96-202-255.ngrok-free.app/embeddings"
 
18
 
19
  # FAISS index setup
20
  DIM = 768 # Adjust based on the embedding model
@@ -72,6 +73,10 @@ def store_document_data(PDF_FILE):
72
  def retrieve_document(query):
73
  print(f"Retrieving document based on:\n{query}")
74
 
 
 
 
 
75
  # Generate query embedding
76
  query_embedding = embedding_model.encode([query]).astype(np.float32)
77
 
@@ -95,7 +100,7 @@ def clean_text(text):
95
  print("cleaning")
96
  text = unicodedata.normalize("NFKC", text) # Normalize Unicode characters
97
  text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces and newlines
98
- text = re.sub(r'[^a-zA-Z0-9.,!?;:\'\"()\-]', ' ', text) # Keep basic punctuation
99
  text = re.sub(r'(?i)(page\s*\d+)', '', text) # Remove page numbers
100
  return text
101
 
 
15
  load_dotenv()
16
 
17
  API_URL_EMBEDDINGS = f"https://e4e5-196-96-202-255.ngrok-free.app/embeddings"
18
+ API_URL_METADATA = f"https://e4e5-196-96-202-255.ngrok-free.app/metadata"
19
 
20
  # FAISS index setup
21
  DIM = 768 # Adjust based on the embedding model
 
73
  def retrieve_document(query):
74
  print(f"Retrieving document based on:\n{query}")
75
 
76
+ embeddings_file = response.get(API_URL_EMBEDDINGS)
77
+ metadata_file = response.get(API_URL_METADATA)
78
+
79
+ print(embeddings_file, metadata_file)
80
  # Generate query embedding
81
  query_embedding = embedding_model.encode([query]).astype(np.float32)
82
 
 
100
  print("cleaning")
101
  text = unicodedata.normalize("NFKC", text) # Normalize Unicode characters
102
  text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces and newlines
103
+ text = re.sub(r'[^a-zA-Z0-9.,!?;:\\"()\-]', ' ', text) # Keep basic punctuation
104
  text = re.sub(r'(?i)(page\s*\d+)', '', text) # Remove page numbers
105
  return text
106