Spaces:

luanpoppe
/

vella-backend

Running

File size: 9,861 Bytes

import os
from typing import Any, List, Dict, Literal, Tuple, Optional, Union, cast

from pydantic import SecretStr
from _utils.langchain_utils.LLM_class import LLM
from _utils.langchain_utils.Vector_store_class import VectorStore
from gerar_documento.serializer import (
    GerarDocumentoComPDFProprioSerializerData,
    GerarDocumentoSerializerData,
)
from setup.easy_imports import (
    Chroma,
    ChatOpenAI,
    PromptTemplate,
    BM25Okapi,
    Response,
    HuggingFaceEmbeddings,
)
import logging
from _utils.gerar_relatorio_modelo_usuario.DocumentSummarizer_simples import (
    DocumentSummarizer,
)
from _utils.models.gerar_relatorio import (
    RetrievalConfig,
)
from cohere import Client
from _utils.langchain_utils.Splitter_class import Splitter
import time


def reciprocal_rank_fusion(result_lists, weights=None):
    """Combine multiple ranked lists using reciprocal rank fusion"""
    fused_scores = {}
    num_lists = len(result_lists)
    if weights is None:
        weights = [1.0] * num_lists

    for i in range(num_lists):
        for doc_id, score in result_lists[i]:
            if doc_id not in fused_scores:
                fused_scores[doc_id] = 0
            fused_scores[doc_id] += weights[i] * score

    # Sort by score in descending order
    sorted_results = sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)

    return sorted_results


class GerarDocumento:
    openai_api_key = os.environ.get("OPENAI_API_KEY", "")
    cohere_api_key = os.environ.get("COHERE_API_KEY", "")
    resumo_gerado = ""

    def __init__(
        self,
        serializer: Union[
            GerarDocumentoSerializerData, GerarDocumentoComPDFProprioSerializerData, Any
        ],
    ):
        self.config = RetrievalConfig(
            num_chunks=serializer.num_chunks_retrieval,
            embedding_weight=serializer.embedding_weight,
            bm25_weight=serializer.bm25_weight,
            context_window=serializer.context_window,
            chunk_overlap=serializer.chunk_overlap,
        )
        self.logger = logging.getLogger(__name__)
        # self.prompt_auxiliar = prompt_auxiliar
        self.gpt_model = serializer.model
        self.gpt_temperature = serializer.gpt_temperature
        self.prompt_gerar_documento = serializer.prompt_gerar_documento

        self.openai_api_key = self.openai_api_key
        self.cohere_client = Client(self.cohere_api_key)
        self.embeddings = HuggingFaceEmbeddings(model_name=serializer.hf_embedding)
        self.num_k_rerank = serializer.num_k_rerank
        self.model_cohere_rerank = serializer.model_cohere_rerank
        self.splitter = Splitter(serializer.chunk_size, serializer.chunk_overlap)

        self.vector_store = VectorStore(serializer.hf_embedding)

    def retrieve_with_rank_fusion(
        self, vector_store: Chroma, bm25: BM25Okapi, chunk_ids: List[str], query: str
    ) -> List[Dict]:
        """Combine embedding and BM25 retrieval results"""
        try:
            # Get embedding results
            embedding_results = vector_store.similarity_search_with_score(
                query, k=self.config.num_chunks
            )

            # Convert embedding results to list of (chunk_id, score)
            embedding_list = [
                (doc.metadata["chunk_id"], 1 / (1 + score))
                for doc, score in embedding_results
            ]

            # Get BM25 results
            tokenized_query = query.split()
            bm25_scores = bm25.get_scores(tokenized_query)

            # Convert BM25 scores to list of (chunk_id, score)
            bm25_list = [
                (chunk_ids[i], float(score)) for i, score in enumerate(bm25_scores)
            ]

            # Sort bm25_list by score in descending order and limit to top N results
            bm25_list = sorted(bm25_list, key=lambda x: x[1], reverse=True)[
                : self.config.num_chunks
            ]

            # Normalize BM25 scores
            calculo_max = max(
                [score for _, score in bm25_list]
            )  # Criei este max() pois em alguns momentos estava vindo valores 0, e reclamava que não podia dividir por 0
            max_bm25 = calculo_max if bm25_list and calculo_max else 1
            bm25_list = [(doc_id, score / max_bm25) for doc_id, score in bm25_list]

            # Pass the lists to rank fusion
            result_lists = [embedding_list, bm25_list]
            weights = [self.config.embedding_weight, self.config.bm25_weight]

            combined_results = reciprocal_rank_fusion(result_lists, weights=weights)

            return combined_results  # type: ignore

        except Exception as e:
            self.logger.error(f"Error in rank fusion retrieval: {str(e)}")
            raise

    def rank_fusion_get_top_results(
        self,
        vector_store: Chroma,
        bm25: BM25Okapi,
        chunk_ids: List[str],
        query: str = "Summarize the main points of this document",
    ):
        # Get combined results using rank fusion
        ranked_results = self.retrieve_with_rank_fusion(
            vector_store, bm25, chunk_ids, query
        )

        # Prepare context and track sources
        contexts = []
        sources = []

        # Get full documents for top results
        for chunk_id, score in ranked_results[: self.config.num_chunks]:
            results = vector_store.get(
                where={"chunk_id": chunk_id}, include=["documents", "metadatas"]
            )

            if results["documents"]:
                context = results["documents"][0]
                metadata = results["metadatas"][0]

                contexts.append(context)
                sources.append(
                    {
                        "content": context,
                        "page": metadata["page"],
                        "chunk_id": chunk_id,
                        "relevance_score": score,
                        "context": metadata.get("context", ""),
                    }
                )

        return sources, contexts

    def select_model_for_last_requests(
        self,
        llm_ultimas_requests: Literal[
            "gpt-4o-mini", "deepseek-chat", "gemini-2.0-flash"
        ],
    ):
        llm_instance = LLM()
        if llm_ultimas_requests == "gpt-4o-mini":
            llm = ChatOpenAI(
                temperature=self.gpt_temperature,
                model=self.gpt_model,
                api_key=SecretStr(self.openai_api_key),
            )
        elif llm_ultimas_requests == "deepseek-chat":
            llm = llm_instance.deepseek()
        elif llm_ultimas_requests == "gemini-2.0-flash":
            llm = llm_instance.google_gemini("gemini-2.0-flash")
        return llm

    async def gerar_documento_final(
        self,
        vector_store: Chroma,
        bm25: BM25Okapi,
        chunk_ids: List[str],
        llm_ultimas_requests: str,
        query: str = "Summarize the main points of this document",
    ) -> List[Dict]:
        try:
            sources, contexts = self.rank_fusion_get_top_results(
                vector_store, bm25, chunk_ids, query
            )

            prompt_gerar_documento = PromptTemplate(
                template=cast(str, self.prompt_gerar_documento),
                input_variables=["context"],
            )

            documento_gerado = ""
            tentativas = 0

            while tentativas < 5 and not documento_gerado:
                tentativas += 1
                llm = self.select_model_for_last_requests(llm_ultimas_requests)  # type: ignore
                resposta = llm.invoke(
                    prompt_gerar_documento.format(
                        context="\n\n".join(contexts),
                    )
                )
                if hasattr(resposta, "content") and resposta.content.strip():  # type: ignore
                    documento_gerado = resposta.content.strip()  # type: ignore
                else:
                    print(f"Tentativa {tentativas}: resposta vazia ou inexistente.")
                    time.sleep(5)

            if not documento_gerado:
                llm = self.select_model_for_last_requests("gpt-4o-mini")
                resposta = llm.invoke(
                    prompt_gerar_documento.format(
                        context="\n\n".join(contexts),
                    )
                )
                documento_gerado = resposta.content.strip()  # type: ignore
                if not documento_gerado:
                    raise Exception(
                        "Falha ao tentar gerar o documento final por 5 tentativas e também ao tentar na última tentativa com o chat-gpt 4o mini."
                    )

            # Split the response into paragraphs
            summaries = [p.strip() for p in documento_gerado.split("\n\n") if p.strip()]

            # Create structured output
            structured_output = []
            for idx, summary in enumerate(summaries):
                source_idx = min(idx, len(sources) - 1)
                structured_output.append(
                    {
                        "content": summary,
                        "source": {
                            "page": sources[source_idx]["page"],
                            "text": sources[source_idx]["content"][:200] + "...",
                            "context": sources[source_idx]["context"],
                            "relevance_score": sources[source_idx]["relevance_score"],
                            "chunk_id": sources[source_idx]["chunk_id"],
                        },
                    }
                )

            return structured_output

        except Exception as e:
            self.logger.error(f"Error generating enhanced summary: {str(e)}")
            raise

    async def validar_conteudo_documento_final(self):
        documento_gerado = ""
        tentativas = 0