Update app.py
Browse files
app.py
CHANGED
@@ -72,7 +72,13 @@ class EmbeddingModel:
|
|
72 |
# Embed the list of chunks
|
73 |
return self.model.embed_documents(chunks)
|
74 |
|
75 |
-
def process_files(model_name, split_strategy, chunk_size
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
# File processing
|
77 |
text = ""
|
78 |
for file in os.listdir(FILES_DIR):
|
@@ -90,7 +96,7 @@ def process_files(model_name, split_strategy, chunk_size=500, overlap_size=50, m
|
|
90 |
# Embed chunks, not the full text
|
91 |
model = EmbeddingModel(MODELS[model_name], max_tokens=max_tokens)
|
92 |
embeddings = model.embed(chunks)
|
93 |
-
|
94 |
return embeddings, chunks
|
95 |
|
96 |
def search_embeddings(query, model_name, top_k):
|
@@ -133,7 +139,15 @@ def upload_file(file, model_name, split_strategy, overlap_size,chunk_size, max_
|
|
133 |
overlap_size = int(overlap_size) # Convert to int if valid
|
134 |
except ValueError:
|
135 |
return {"error": "Chunk size and overlap size must be valid integers."}
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
# Handle file upload using Gradio file object
|
139 |
file_path = file.name # Get the file path from Gradio file object
|
|
|
72 |
# Embed the list of chunks
|
73 |
return self.model.embed_documents(chunks)
|
74 |
|
75 |
+
def process_files(model_name, split_strategy, chunk_size, overlap_size, max_tokens):
|
76 |
+
print('-----mmm--------')
|
77 |
+
print(model_name)
|
78 |
+
print(split_strategy)
|
79 |
+
print(overlap_size)
|
80 |
+
print(chunk_size)
|
81 |
+
print(max_tokens)
|
82 |
# File processing
|
83 |
text = ""
|
84 |
for file in os.listdir(FILES_DIR):
|
|
|
96 |
# Embed chunks, not the full text
|
97 |
model = EmbeddingModel(MODELS[model_name], max_tokens=max_tokens)
|
98 |
embeddings = model.embed(chunks)
|
99 |
+
print(chunks)
|
100 |
return embeddings, chunks
|
101 |
|
102 |
def search_embeddings(query, model_name, top_k):
|
|
|
139 |
overlap_size = int(overlap_size) # Convert to int if valid
|
140 |
except ValueError:
|
141 |
return {"error": "Chunk size and overlap size must be valid integers."}
|
142 |
+
print('-------------')
|
143 |
+
print(file.name)
|
144 |
+
print(model_name)
|
145 |
+
print(split_strategy)
|
146 |
+
print(overlap_size)
|
147 |
+
print(chunk_size)
|
148 |
+
print(max_tokens)
|
149 |
+
print(query)
|
150 |
+
print(top_k)
|
151 |
|
152 |
# Handle file upload using Gradio file object
|
153 |
file_path = file.name # Get the file path from Gradio file object
|