|
import faiss |
|
import pickle |
|
import numpy as np |
|
import re |
|
from sentence_transformers import SentenceTransformer |
|
from huggingface_hub import hf_hub_download |
|
from llama_cpp import Llama |
|
|
|
def load_faiss_index(index_path="faiss_index/faiss_index.faiss", doc_path="faiss_index/documents.pkl"): |
|
index = faiss.read_index(index_path) |
|
with open(doc_path, "rb") as f: |
|
documents = pickle.load(f) |
|
return index, documents |
|
|
|
def get_embedding_model(): |
|
return SentenceTransformer("sentence-transformers/multi-qa-MiniLM-L6-cos-v1") |
|
|
|
def query_index(question, index, documents, model, k=3): |
|
question_embedding = model.encode([question]) |
|
_, indices = index.search(np.array(question_embedding).astype("float32"), k) |
|
return [documents[i] for i in indices[0]] |
|
|
|
def nettoyer_context(context): |
|
context = re.sub(r"\[\'(.*?)\'\]", r"\1", context) |
|
context = context.replace("None", "") |
|
return context |
|
|
|
def generate_answer(question, context): |
|
model_file = hf_hub_download( |
|
repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", |
|
filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf" |
|
) |
|
|
|
llm = Llama( |
|
model_path=model_file, |
|
n_ctx=2048, |
|
n_threads=6, |
|
verbose=False |
|
) |
|
|
|
prompt = f""" |
|
#à personnaliser |
|
{context} |
|
|
|
Question : {question} |
|
Réponse : |
|
""" |
|
|
|
output = llm(prompt, max_tokens=1024, stop=["</s>"]) |
|
return output["choices"][0]["text"].strip() |
|
|