Update app.py
Browse files
app.py
CHANGED
@@ -159,14 +159,22 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
|
|
159 |
return jellyfish.levenshtein_distance(text_phonetic, query_phonetic)
|
160 |
return 0
|
161 |
|
162 |
-
def optimize_query(query, llm_model):
|
|
|
163 |
llm = HuggingFacePipeline.from_model_id(
|
164 |
-
model_id=
|
165 |
task="text2text-generation",
|
166 |
-
model_kwargs={"do_sample": True, "temperature": 0
|
167 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
multi_query_retriever = MultiQueryRetriever.from_llm(
|
169 |
-
retriever=
|
170 |
llm=llm
|
171 |
)
|
172 |
optimized_queries = multi_query_retriever.generate_queries(query)
|
@@ -453,6 +461,7 @@ def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type,
|
|
453 |
)
|
454 |
optimized_queries = multi_query_retriever.generate_queries(query)
|
455 |
return optimized_queries
|
|
|
456 |
|
457 |
# New postprocessing function
|
458 |
def rerank_results(results, query, reranker):
|
@@ -723,11 +732,21 @@ Text chunks:
|
|
723 |
Provide your suggestions in a Python dictionary format."""
|
724 |
|
725 |
# Use a HuggingFace model for text generation
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
|
|
730 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
731 |
|
732 |
# Generate suggestions
|
733 |
suggested_settings = llm.invoke(prompt)
|
@@ -1305,7 +1324,7 @@ if __name__ == "__main__":
|
|
1305 |
iface.launch(share=share)
|
1306 |
|
1307 |
def run_automated_tests_and_analyze(*args):
|
1308 |
-
file, query, model_types, model_names, split_strategies, chunk_sizes, overlap_sizes, \
|
1309 |
vector_store_types, search_types, top_k_values, optimize_vocab, use_query_optimization, use_reranking = args
|
1310 |
|
1311 |
test_params = {
|
@@ -1327,7 +1346,7 @@ def run_automated_tests_and_analyze(*args):
|
|
1327 |
'use_reranking': [use_reranking]
|
1328 |
}
|
1329 |
|
1330 |
-
results_df, stats_df = automated_testing(file, query, test_params)
|
1331 |
recommendations = analyze_results(stats_df)
|
1332 |
|
1333 |
return results_df, stats_df, recommendations
|
|
|
159 |
return jellyfish.levenshtein_distance(text_phonetic, query_phonetic)
|
160 |
return 0
|
161 |
|
162 |
+
#def optimize_query(query, llm_model):
|
163 |
+
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
164 |
llm = HuggingFacePipeline.from_model_id(
|
165 |
+
model_id=llm_model,
|
166 |
task="text2text-generation",
|
167 |
+
model_kwargs={"do_sample": True, "temperature": 0, "max_new_tokens": 64},
|
168 |
)
|
169 |
+
|
170 |
+
# Create a temporary vector store for query optimization
|
171 |
+
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
172 |
+
|
173 |
+
# Create a retriever with the temporary vector store
|
174 |
+
temp_retriever = get_retriever(temp_vector_store, search_type, {"k": top_k})
|
175 |
+
|
176 |
multi_query_retriever = MultiQueryRetriever.from_llm(
|
177 |
+
retriever=temp_retriever,
|
178 |
llm=llm
|
179 |
)
|
180 |
optimized_queries = multi_query_retriever.generate_queries(query)
|
|
|
461 |
)
|
462 |
optimized_queries = multi_query_retriever.generate_queries(query)
|
463 |
return optimized_queries
|
464 |
+
|
465 |
|
466 |
# New postprocessing function
|
467 |
def rerank_results(results, query, reranker):
|
|
|
732 |
Provide your suggestions in a Python dictionary format."""
|
733 |
|
734 |
# Use a HuggingFace model for text generation
|
735 |
+
model_id = "google/flan-t5-large"
|
736 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
737 |
+
model = AutoModelForCausalLM.from_pretrained(model_id)
|
738 |
+
pipe = pipeline(
|
739 |
+
"text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
|
740 |
)
|
741 |
+
llm = HuggingFacePipeline(pipeline=pipe)
|
742 |
+
|
743 |
+
|
744 |
+
|
745 |
+
#llm = HuggingFacePipeline.from_model_id(
|
746 |
+
# model_id="google/flan-t5-large",
|
747 |
+
# task="text2text-generation",
|
748 |
+
# model_kwargs={"do_sample": True, "temperature": 0.7, "max_new_tokens": 512},
|
749 |
+
#)
|
750 |
|
751 |
# Generate suggestions
|
752 |
suggested_settings = llm.invoke(prompt)
|
|
|
1324 |
iface.launch(share=share)
|
1325 |
|
1326 |
def run_automated_tests_and_analyze(*args):
|
1327 |
+
file, query, auto_expected_result_input, model_types, model_names, split_strategies, chunk_sizes, overlap_sizes, \
|
1328 |
vector_store_types, search_types, top_k_values, optimize_vocab, use_query_optimization, use_reranking = args
|
1329 |
|
1330 |
test_params = {
|
|
|
1346 |
'use_reranking': [use_reranking]
|
1347 |
}
|
1348 |
|
1349 |
+
results_df, stats_df = automated_testing(file, query, test_params, auto_expected_result_input)
|
1350 |
recommendations = analyze_results(stats_df)
|
1351 |
|
1352 |
return results_df, stats_df, recommendations
|