Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	File size: 4,604 Bytes
			
			| 2dcc710 fca97ef 2dcc710 fca97ef 2dcc710 fca97ef 2dcc710 fca97ef 2dcc710 fca97ef aaec9ae fca97ef aaec9ae fca97ef aaec9ae fca97ef aaec9ae fca97ef 2dcc710 053ffc5 fca97ef | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | import logging
import json
from contextlib import asynccontextmanager
from typing import Any, List, Tuple
import random
from fastapi import FastAPI
from pydantic import BaseModel
from FlagEmbedding import BGEM3FlagModel, FlagReranker
from starlette.requests import Request
import torch
random.seed(42)
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
def get_data(model):
    with open("data/paris-2024-faq.json") as f:
        data = json.load(f)
    data = [it for it in data if it['lang'] == 'en']
    questions = [it['label'] for it in data]
    q_embeddings = model[0].encode(questions, return_dense=False, return_sparse=False, return_colbert_vecs=True)
    return q_embeddings['colbert_vecs'], questions, [it['body'] for it in data]
class InputLoad(BaseModel):
    question: str
class ResponseLoad(BaseModel):
    answer: str
class ML(BaseModel):
    retriever: Any
    ranker: Any
    data: Tuple[List[Any], List[str], List[str]]
def load_models(app: FastAPI) -> FastAPI:
    logger.info("Loading embedding model...")
    retriever=BGEM3FlagModel('BAAI/bge-m3',  use_fp16=True) ,
    logger.info("Loading ranker model...")
    ranker=FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
    logger.info("Done loading models!")
    ml = ML(
        retriever=retriever,
        ranker=ranker,
        data=get_data(retriever)
    )
    app.ml = ml
    logger.info("Done with startup steps!")
    return app
@asynccontextmanager
async def lifespan(app: FastAPI):
    app = load_models(app=app)
    yield
app = FastAPI(lifespan=lifespan)
@app.get("/health")
def health_check():
    return {"server": "running"}
@app.post("/answer/")
async def receive(input_load: InputLoad, request: Request) -> ResponseLoad:
    ml: ML = request.app.ml
    candidate_indices, candidate_scores = get_candidates(input_load.question, ml)
    answer_candidate, rank_score, retriever_score = rerank_candidates(input_load.question, candidate_indices, candidate_scores, ml)
    answer = get_final_answer(answer_candidate, retriever_score)
    return ResponseLoad(answer=answer)
def get_candidates(question, ml, topk=5):
    question_emb = ml.retriever[0].encode([question], return_dense=False, return_sparse=False, return_colbert_vecs=True)
    question_emb = question_emb['colbert_vecs'][0]
    scores = [ml.retriever[0].colbert_score(question_emb, faq_emb) for faq_emb in ml.data[0]]
    scores_tensor = torch.stack(scores)
    top_values, top_indices = torch.topk(scores_tensor, topk)
    return top_indices.tolist(), top_values.tolist()
def rerank_candidates(question, indices, values, ml):
    candidate_answers = [ml.data[2][_ind] for _ind in indices]
    scores = ml.ranker.compute_score([[question, it] for it in candidate_answers])
    rank_score = max(scores)
    rank_ind = scores.index(rank_score)
    retriever_score = values[rank_ind]
    return candidate_answers[rank_ind], rank_score, retriever_score
def get_final_answer(answer, retriever_score):
    logger.info(f"Retriever score: {retriever_score}")
    if retriever_score < 0.65:
        # nothing relevant found!
        return random.sample(NOT_FOUND_ANSWERS, k=1)[0]
    elif retriever_score < 0.8:
        # might be relevant, but let's be careful
        return f"{random.sample(ROUGH_MATCH_INTROS, k=1)[0]}\n{answer}"
    else:
        # good match
        return f"{random.sample(GOOD_MATCH_INTROS, k=1)[0]}\n{answer}\n{random.sample(GOOD_MATCH_ENDS, k=1)[0]}"
NOT_FOUND_ANSWERS = [
    "I'm sorry, but I couldn't find any information related to your question in my knowledge base.",
    "Apologies, but I don't have the information you're looking for at the moment.",
    "I’m sorry, I couldn’t locate any relevant details in my current data.",
    "Unfortunately, I wasn't able to find an answer to your query. Can I help with something else?",
    "I'm afraid I don't have the information you need right now. Please feel free to ask another question.",
    "Sorry, I couldn't find anything that matches your question in my knowledge base.",
    "I apologize, but I wasn't able to retrieve information related to your query.",
    "I'm sorry, but it looks like I don't have an answer for that. Is there anything else I can assist with?",
    "Regrettably, I couldn't find the information you requested. Can I help you with anything else?",
    "I’m sorry, but I don't have the details you're seeking in my knowledge database."
]
GOOD_MATCH_INTROS = ["Super!"]
GOOD_MATCH_ENDS = ["Hopes this helps!"]
ROUGH_MATCH_INTROS = ["Not sure if that answers your question!"]
 |