# LangChain QA Panel App

In [1]:
#!pip install langchain openai chromadb tiktoken pypdf panel

In [2]:
import os 
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import panel as pn
import tempfile



In [3]:
# Needed as panel serve was not finding my utils.py
# => Copy paste its content here
# LLM
# Ollama for local tests
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import Ollama
# Ref.: https://mistral.ai/news/mixtral-of-experts/#instructed-models
# Q5_K_M quantzation flavor for best quality/recommended tradeoff (memory is no problem here)
# Ref.: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF#provided-files
MISTRAL = "mistral:7b-instruct-v0.2-q5_K_M"
# Q4_K quantization flavor for best memory/quality/recommended tradeoff
# Ref.: https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF#provided-files
# mixtral:8x7b-instruct-v0.1-q4_K_M was sadly still too big for my Mac
MIXTRAL = "mixtral:8x7b-instruct-v0.1-q3_K_L"
# Llama2 13B 
# Ref.: https://huggingface.co/TheBloke/Llama-2-13B-GGUF
LLAMA2 = "llama2:13b-chat-q5_K_M"
mistral = Ollama(
    model=MISTRAL,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    # Ref.: https://api.python.langchain.com/en/latest/llms/langchain_community.llms.ollama.Ollama.html#langchain_community.llms.ollama.Ollama.format
    # format="json"
)
mixtral = Ollama(
    model=MIXTRAL,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)
llama2 = Ollama(
    model=LLAMA2,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)


# LOAD
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import PyPDFLoader

FILES = {
    'md': [
        # "Présentation modes dégradés-20230120_112423-Enregistrement de la réunion.md",
        "YouTube - Mode secours telephonie.md"
    ],
    'pdf': [
        # "SI-Samu_Fiche procédure_Mode dégradé_Perte de CRRA.pdf",
        # "[SI-Samu] Fiche mémo - Procédure Mode dégradé.pdf",
        "SI-Samu_Documentation_produit_SF4_J18HF2_20231219 - mode secours seul.pdf",
        # "SI-Samu_Documentation_produit_SF4_J18HF2_20231219.pdf"
    ]
}

def load_data(files):
    data = {'md': [], 'pdf': []}
    for pdf in files['pdf']:
        data['pdf'].extend(PyPDFLoader('resources/' + pdf).load())
    for md in files['md']:
        data['md'].extend(TextLoader('resources/' + md).load())
    return data

def to_full_data(data):
    return [
        *data['md'],
        *data['pdf']
    ]

# SPLIT
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import MarkdownHeaderTextSplitter

def split_MD_then_recursive(data):
    # - First use MarkDown title splitter on .MD and then RecursiveSplitter on all
    # MD splits
    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=[
        ("#", "Titre 1"),
        ("##", "Titre 2"),
        ("###", "Titre 3"),
    ], strip_headers=False)
    md_header_splits = data['pdf'].copy()
    for md in data['md']:
        md_header_splits.extend(markdown_splitter.split_text(md.page_content))

    # Char-level splits
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=500, 
        chunk_overlap=50  # to improve results quality
    )
    # Split
    return text_splitter.split_documents(md_header_splits)

# EMBED
# Directly done in the different scripts

# RETRIEVE
from langchain.storage import InMemoryStore
from langchain.retrievers import ParentDocumentRetriever, BM25Retriever, EnsembleRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

# Ensemble is based on weight fusion (Reciprocal Rank Fusion) | Ref.: https://safjan.com/implementing-rank-fusion-in-python/
def get_parent_ensemble_retriever(embeddings, full_data, all_splits, k=4, parent_chunk_size=2000, child_chunk_size=400, collection_name="store"):
    # - ParentDocumentRetriever: embed small chunks but retrieve with bigger context
    # This text splitter is used to create the parent documents
    parent_splitter = RecursiveCharacterTextSplitter(chunk_size=parent_chunk_size)
    # This text splitter is used to create the child documents
    # It should create documents smaller than the parent (don't make bigger than 512 as most embeddings trunk after that)
    child_splitter = RecursiveCharacterTextSplitter(chunk_size=child_chunk_size)
    # The vectorstore to use to index the child chunks
    parent_vectorstore = Chroma(
        collection_name=collection_name, 
        embedding_function=embeddings
    )
    # The storage layer for the parent documents
    parent_store = InMemoryStore()
    parent_retriever = ParentDocumentRetriever(
        vectorstore=parent_vectorstore,
        docstore=parent_store,
        child_splitter=child_splitter,
        parent_splitter=parent_splitter,
        search_kwargs={
            "k": k,
            # "score_threshold": 0.5
        },
        # search_type="mmr"
    )
    parent_retriever.add_documents(full_data)

    # - EnsembleRetriever
    # BM25 logic
    bm25_retriever = BM25Retriever.from_texts(
        list(map(lambda s: s.page_content, all_splits)), 
        metadatas=list(map(lambda s: {"retriever": "BM25 sparse similiarity", **s.metadata}, all_splits))
        
    )
    bm25_retriever.k = k

    # Ensemble of BM25 + vectorstore on parent retriever
    return EnsembleRetriever(
        retrievers=[parent_retriever, bm25_retriever], weights=[0.5, 0.5]
    )

# PROMPT
# Add more context to query + update system prompt to make it speak French
# Ref.: https://stackoverflow.com/questions/76554411/unable-to-pass-prompt-template-to-retrievalqa-in-langchain
# Ref.: https://community.openai.com/t/how-to-prevent-chatgpt-from-answering-questions-that-are-outside-the-scope-of-the-provided-context-in-the-system-role-message/112027/7
from langchain import PromptTemplate
template = """
System: You are helping a user of "bandeau téléphonique SI-SAMU" (a CTI - Computer Telephony Integration - system) during system failure as he needs to use its local backup phone.
Context information is below. Given the context information and not prior knowledge, answer the query.
Language: Answer in French and using "vous".
---
Context: {context}
---
Question: {question}
---
Réponse :
"""
PROMPT = PromptTemplate(template=template, input_variables=['question', 'context'])

# RESULTS
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

def parse_answer(answer):
    print(f">> {answer['query']}")
    print(f">> {answer['result']}")
    print(">> Sources :")
    for doc in answer['source_documents']:
        page = ''
        if 'page' in doc.metadata:
            page = f" (page {doc.metadata['page']})"
        source = ''
        if 'source' in doc.metadata:
            source = doc.metadata['source']
        titles = ['Titre 1', 'Titre 2', 'Titre 3']
        for title in titles:
            if title in doc.metadata:
                source += f" > {doc.metadata[title]}"
        retriever = f"B25" if 'retriever' in doc.metadata else "vectorstore"
        print(f">>> {color.BOLD}{source}{page} [{retriever}]{color.END}: {doc.page_content}\n---")
    print("--------\n\n")

In [4]:
try:
    # LOAD
    from utils import FILES, load_data, to_full_data
    
    # SPLIT
    from utils import split_MD_then_recursive
    
    # RETRIEVE
    from utils import get_parent_ensemble_retriever
    
    # PROMPT
    from utils import PROMPT
except:
    print("Couldn't load utils")

In [5]:
pn.extension('texteditor', template="bootstrap", sizing_mode='stretch_width')
pn.state.template.param.update(
    main_max_width="960px",
    header_background="#1D70B6",
)

<param.parameterized._ParametersRestorer at 0x12e15f7c0>

In [6]:
prompt = pn.widgets.TextEditor(
    value="", 
    placeholder="Veuillez saisir votre question ici...\n\nComment faire pour me connecter ?\nComment savoir si j'ai des appels en attente ?\nComment faire un transfert accompagné ?\nMot de passe pour se connecter au téléphone ?",
    height=140, 
    toolbar=False
)
run_button = pn.widgets.Button(name="Envoyer")

select_k = pn.widgets.IntSlider(
    name="Nombre d'éléments pertinents à extraire", start=1, end=5, step=1, value=2
)
select_model = pn.widgets.RadioButtonGroup(
    name='Modèle', 
    options=['GPT 3.5', 'GPT 4'],
    value='GPT 4'
)

widgets = pn.Row(
    pn.Column(prompt, run_button, margin=5),
    pn.Card(
        "Modèle",
        pn.Column(select_model, select_k),
        title="Paramètres avancés",
        collapsible=False,
        margin=12
    ), 
    width=940
)

In [7]:
def qa(query, model, k):
    # LOAD
    # from utils import FILES, load_data, to_full_data
    data = load_data(FILES)
    full_data = to_full_data(data)

    # SPLIT
    # from utils import split_MD_then_recursive
    
    all_splits = split_MD_then_recursive(data)

    # EMBED
    # Find the relevant splits to then submit those to the model
    # -> Create embeddings and store them in a vector database
    # -> Use Ollama directly to instantiate an embedding model (ChromaDB here for a vector database)
    from langchain_openai import OpenAIEmbeddings
    # OpenAI ADA
    openai_embeddings = OpenAIEmbeddings()

    # RETRIEVE
    # from utils import get_parent_ensemble_retriever

    # PROMPT
    # from utils import PROMPT

    # LLM
    # Ref.: https://python.langchain.com/docs/integrations/platforms/openai
    from langchain_openai import ChatOpenAI, OpenAI

    gpt35_chat = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0)
    gpt4turbo_chat = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0)

    # PARAMS
    if model == 'GPT 4':
        llm = gpt4turbo_chat
        # if "_gpt4_" in query:
        #     llm = gpt4turbo_chat
        #     query = query.replace("_gpt4_", "")
        # else:
        #     print("blocking GPT4!")
        #     return {
        #         'query': query,
        #         'result': "GPT 4 désactivé, veuillez contacter l'administrateur",
        #         'source_documents': []
        #     }
    else: 
        llm = gpt35_chat
    embeddings = openai_embeddings # openai_embeddings, inference_embeddings
    retriever = get_parent_ensemble_retriever(
        embeddings, 
        full_data, 
        all_splits,
        k, 
        800, 
        200, 
        "ada-store"
    )

    # QA Chain
    from langchain.chains import RetrievalQA
    qachain = RetrievalQA.from_chain_type(
        llm=llm, 
        retriever=retriever,
        return_source_documents=True,
        verbose=True,
        chain_type_kwargs={"prompt": PROMPT}
    )
    result = qachain({"query": query})
    print(result['result'])
    return result

In [8]:
convos = []  # store all panel objects in a list

def qa_result(_): 
    prompt_text = prompt.value
    if prompt_text:
        result = qa(query=prompt_text, model=select_model.value, k=select_k.value)
        convos.extend([
            pn.Row(
                pn.panel("\U0001F60A", width=10),
                prompt_text,
                width=900
            ),
            pn.Row(
                pn.panel("\U0001F916", width=10),
                pn.Column(
                    result["result"],
                    pn.Accordion((
                        f"Éléments utilisés ({len(result['source_documents'])})",
                        pn.pane.Markdown('\n\n--------------------------------------------------------------------\n\n'.join(doc.page_content for doc in result["source_documents"]))
                    )) if len(result['source_documents']) > 0 else None
                )
            )
        ])
    return pn.Column(*convos, margin=15, width=875, min_height=400)


In [9]:
qa_interactive = pn.panel(
    pn.bind(qa_result, run_button),
    loading_indicator=True,
)

In [10]:
output = pn.WidgetBox("*Votre échange s'affichera ici :*", qa_interactive, width=940, scroll=True)

In [11]:
# layout
pn.Column(
    pn.pane.Markdown("""
    ## Support mode secours SI-SAMU

    Cet assistant a été entrainé sur 
    - la documentation produit du Bandeau SI-SAMU (partie "12. Fonctionnement en mode dégradé")
    - la [vidéo](https://www.youtube.com/watch?v=EbupwqYeKdc) d'explication du mode secours de la téléphonie SI-SAMU

    Les réponses de cet assistant sont automatiques et non validées par les équipes du SI-SAMU. 
    Veuillez également ne pas transmettre d'informations personnelles et/ou médicales à l'assistant.
    Vous pouvez l'utiliser pour vous guider et extraire les bouts de documentation adaptés mais un recours au support SI-SAMU est conseillé en cas de problème.
    """),
    widgets,
    output
).servable()