Update app.py
Browse files
app.py
CHANGED
@@ -177,7 +177,6 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
|
|
177 |
return jellyfish.levenshtein_distance(text_phonetic, query_phonetic)
|
178 |
return 0
|
179 |
|
180 |
-
#def optimize_query(query, llm_model):
|
181 |
def optimize_query(
|
182 |
query: str,
|
183 |
llm_model: str = "meta-llama/Llama-3.2-1B",
|
@@ -185,10 +184,10 @@ def optimize_query(
|
|
185 |
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",
|
186 |
vector_store_type: str = "faiss",
|
187 |
search_type: str = "similarity",
|
188 |
-
top_k: int =
|
189 |
) -> List[str]:
|
190 |
# Initialize the language model
|
191 |
-
#llm = HuggingFacePipeline(model=llm_model)
|
192 |
|
193 |
# Create a temporary vector store for query optimization
|
194 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
@@ -202,10 +201,11 @@ def optimize_query(
|
|
202 |
llm=llm
|
203 |
)
|
204 |
|
205 |
-
#
|
206 |
-
optimized_queries = multi_query_retriever.invoke(query)
|
|
|
|
|
207 |
|
208 |
-
return optimized_queries
|
209 |
|
210 |
|
211 |
def create_custom_embedding(texts, model_type='word2vec', vector_size=100, window=5, min_count=1):
|
|
|
177 |
return jellyfish.levenshtein_distance(text_phonetic, query_phonetic)
|
178 |
return 0
|
179 |
|
|
|
180 |
def optimize_query(
|
181 |
query: str,
|
182 |
llm_model: str = "meta-llama/Llama-3.2-1B",
|
|
|
184 |
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",
|
185 |
vector_store_type: str = "faiss",
|
186 |
search_type: str = "similarity",
|
187 |
+
top_k: int = 3 # Reduce top_k for quicker test
|
188 |
) -> List[str]:
|
189 |
# Initialize the language model
|
190 |
+
#llm = HuggingFacePipeline(pipeline(model=llm_model))
|
191 |
|
192 |
# Create a temporary vector store for query optimization
|
193 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
|
|
201 |
llm=llm
|
202 |
)
|
203 |
|
204 |
+
# Limit max time or set a timeout for LLM to avoid endless execution
|
205 |
+
optimized_queries = multi_query_retriever.invoke(query, max_time=30) # Timeout in seconds
|
206 |
+
|
207 |
+
return optimized_queries
|
208 |
|
|
|
209 |
|
210 |
|
211 |
def create_custom_embedding(texts, model_type='word2vec', vector_size=100, window=5, min_count=1):
|