Chris4K commited on
Commit
74a765b
·
verified ·
1 Parent(s): bf514c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -3
app.py CHANGED
@@ -72,7 +72,13 @@ class EmbeddingModel:
72
  # Embed the list of chunks
73
  return self.model.embed_documents(chunks)
74
 
75
- def process_files(model_name, split_strategy, chunk_size=500, overlap_size=50, max_tokens=None):
 
 
 
 
 
 
76
  # File processing
77
  text = ""
78
  for file in os.listdir(FILES_DIR):
@@ -90,7 +96,7 @@ def process_files(model_name, split_strategy, chunk_size=500, overlap_size=50, m
90
  # Embed chunks, not the full text
91
  model = EmbeddingModel(MODELS[model_name], max_tokens=max_tokens)
92
  embeddings = model.embed(chunks)
93
-
94
  return embeddings, chunks
95
 
96
  def search_embeddings(query, model_name, top_k):
@@ -133,7 +139,15 @@ def upload_file(file, model_name, split_strategy, overlap_size,chunk_size, max_
133
  overlap_size = int(overlap_size) # Convert to int if valid
134
  except ValueError:
135
  return {"error": "Chunk size and overlap size must be valid integers."}
136
-
 
 
 
 
 
 
 
 
137
 
138
  # Handle file upload using Gradio file object
139
  file_path = file.name # Get the file path from Gradio file object
 
72
  # Embed the list of chunks
73
  return self.model.embed_documents(chunks)
74
 
75
+ def process_files(model_name, split_strategy, chunk_size, overlap_size, max_tokens):
76
+ print('-----mmm--------')
77
+ print(model_name)
78
+ print(split_strategy)
79
+ print(overlap_size)
80
+ print(chunk_size)
81
+ print(max_tokens)
82
  # File processing
83
  text = ""
84
  for file in os.listdir(FILES_DIR):
 
96
  # Embed chunks, not the full text
97
  model = EmbeddingModel(MODELS[model_name], max_tokens=max_tokens)
98
  embeddings = model.embed(chunks)
99
+ print(chunks)
100
  return embeddings, chunks
101
 
102
  def search_embeddings(query, model_name, top_k):
 
139
  overlap_size = int(overlap_size) # Convert to int if valid
140
  except ValueError:
141
  return {"error": "Chunk size and overlap size must be valid integers."}
142
+ print('-------------')
143
+ print(file.name)
144
+ print(model_name)
145
+ print(split_strategy)
146
+ print(overlap_size)
147
+ print(chunk_size)
148
+ print(max_tokens)
149
+ print(query)
150
+ print(top_k)
151
 
152
  # Handle file upload using Gradio file object
153
  file_path = file.name # Get the file path from Gradio file object