Update app.py
Browse files
app.py
CHANGED
@@ -463,34 +463,6 @@ def optimize_vocabulary(texts, vocab_size=10000, min_frequency=2):
|
|
463 |
tokenizer.train_from_iterator(optimized_texts, trainer)
|
464 |
|
465 |
return tokenizer, optimized_texts
|
466 |
-
|
467 |
-
# New preprocessing function
|
468 |
-
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
469 |
-
# Use a HuggingFace model for text generation
|
470 |
-
#model_id = "google/flan-t5-large"
|
471 |
-
#tokenizer = AutoTokenizer.from_pretrained(model_id)
|
472 |
-
#model = AutoModelForCausalLM.from_pretrained(model_id)
|
473 |
-
#pipe = pipeline(
|
474 |
-
# "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
|
475 |
-
#)
|
476 |
-
#llm = HuggingFacePipeline(pipeline=pipe)
|
477 |
-
|
478 |
-
#llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
|
479 |
-
|
480 |
-
|
481 |
-
# Create a temporary vector store for query optimization
|
482 |
-
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
483 |
-
|
484 |
-
# Create a retriever with the temporary vector store
|
485 |
-
temp_retriever = get_retriever(temp_vector_store, search_type, {"k": top_k})
|
486 |
-
|
487 |
-
multi_query_retriever = MultiQueryRetriever.from_llm(
|
488 |
-
retriever=temp_retriever,
|
489 |
-
llm=llm
|
490 |
-
)
|
491 |
-
optimized_queries = multi_query_retriever.generate_queries(query)
|
492 |
-
return optimized_queries
|
493 |
-
|
494 |
|
495 |
# New postprocessing function
|
496 |
def rerank_results(results, query, reranker):
|
|
|
463 |
tokenizer.train_from_iterator(optimized_texts, trainer)
|
464 |
|
465 |
return tokenizer, optimized_texts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
|
467 |
# New postprocessing function
|
468 |
def rerank_results(results, query, reranker):
|