# import gradio as gr
# from qdrant_client import models, QdrantClient
# from sentence_transformers import SentenceTransformer
# from PyPDF2 import PdfReader
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.callbacks.manager import CallbackManager
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# # from langchain.llms import LlamaCpp
# from langchain.vectorstores import Qdrant
# from qdrant_client.http import models
# # from langchain.llms import CTransformers
# from ctransformers import AutoModelForCausalLM


# # loading the embedding model - 

# encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')

# print("embedding model loaded.............................")
# print("####################################################")

# # loading the LLM 

# callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# print("loading the LLM......................................")

# # llm = LlamaCpp(
# #     model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
# #     n_ctx=2048,
# #     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
# #     callback_manager=callback_manager,
# #     verbose=True,
# # )

# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF", 
#                                            model_file="llama-2-7b-chat.Q8_0.gguf", 
#                                            model_type="llama", 
#                                           #  config = ctransformers.hub.AutoConfig,
#                                            # hf = True
#                                            temperature = 0.2,
#                                            max_new_tokens = 1024,
#                                            stop = ['\n']
#                                            )


# print("LLM loaded........................................")
# print("################################################################")

# def get_chunks(text):
#     text_splitter = RecursiveCharacterTextSplitter(
#         # seperator = "\n",
#         chunk_size = 500,
#         chunk_overlap = 100,
#         length_function = len,
#     )

#     chunks = text_splitter.split_text(text)
#     return chunks


# pdf_path = './100 Weird Facts About the Human Body.pdf'


# reader = PdfReader(pdf_path)
# text = ""
# num_of_pages = len(reader.pages)
# for page in range(num_of_pages):
#     current_page = reader.pages[page]
#     text += current_page.extract_text()


# chunks = get_chunks(text)

# print("Chunks are ready.....................................")
# print("######################################################")

# qdrant = QdrantClient(path = "./db")
# print("db  created................................................")
# print("#####################################################################")

# qdrant.recreate_collection(
#     collection_name="my_facts",
#     vectors_config=models.VectorParams(
#         size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
#         distance=models.Distance.COSINE,
#     ),
# )

# print("Collection created........................................")
# print("#########################################################")


# li = []
# for i in range(len(chunks)):
#     li.append(i)
 
# dic = zip(li, chunks)
# dic= dict(dic)

# qdrant.upload_records(
#     collection_name="my_facts",
#     records=[
#         models.Record(
#             id=idx,
#             vector=encoder.encode(dic[idx]).tolist(),
#             payload= {dic[idx][:5] : dic[idx]}
#         ) for idx in dic.keys()
#     ],
# )

# print("Records uploaded........................................")
# print("###########################################################")

# def chat(question):
#     # question = input("ask question from pdf.....")


#     hits = qdrant.search(
#         collection_name="my_facts",
#         query_vector=encoder.encode(question).tolist(),
#         limit=3
#     )
#     context = []
#     for hit in hits:
#       context.append(list(hit.payload.values())[0])
    
#     context = context[0] + context[1] + context[2]

#     system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
#     Read the given context before answering questions and think step by step. If you can not answer a user question based on 
#     the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""


#     B_INST, E_INST = "[INST]", "[/INST]"

#     B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

#     SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS

#     instruction = f""" 
#     Context: {context}
#     User: {question}"""

#     prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST

#     result = llm(prompt_template)
#     return result 

# gr.Interface(
#     fn = chat,
#     inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
#     outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
#     title="Q&N with PDF 👩🏻‍💻📓✍🏻💡",
#     description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡",
#     theme="soft",
#     examples=["Hello", "what is the speed of human nerve impulses?"],
#     # cache_examples=True,
# ).launch() 


import gradio as gr
from threading import Thread
from queue import SimpleQueue
from typing import Any, Dict, List, Union
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import LLMResult
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from qdrant_client.models import PointStruct
import os
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# from qdrant_client import QdrantClient
# from langchain import VectorDBQA - This is obsolete
from langchain.chains import RetrievalQA 
from langchain.llms import LlamaCpp
# from PyPDF2 import PdfReader
from langchain.vectorstores import Qdrant
# from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings
from transformers import AutoModel
from qdrant_client.http import models
# from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from ctransformers import AutoModelForCausalLM

# loading the embedding model - 

encoder = SentenceTransformer("all-MiniLM-L6-v2")

print("embedding model loaded.............................")
print("####################################################")

# loading the LLM 

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

print("loading the LLM......................................")

# llm = LlamaCpp(
#     model_path="/home/devangpagare/llm/models/llama-2-7b-chat.Q3_K_S.gguf",
#     # n_gpu_layers=n_gpu_layers,
#     # n_batch=n_batch,
#     n_ctx=2048,
#     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
#     callback_manager=callback_manager,
#     verbose=True,
# )

llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF", 
                                           model_file="llama-2-7b-chat.Q3_K_S.gguf", 
                                           model_type="llama", 
                                          #  config = ctransformers.hub.AutoConfig,
                                           # hf = True
                                           temperature = 0.2,
                                           # max_new_tokens = 1024,
                                           # stop = ['\n']
                                           )


print("LLM loaded........................................")
print("################################################################")

def get_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        # seperator = "\n",
        chunk_size = 500,
        chunk_overlap = 100,
        length_function = len,
    )

    chunks = text_splitter.split_text(text)
    return chunks


pdf_path = './100 Weird Facts About the Human Body.pdf'


reader = PdfReader(pdf_path)
text = ""
num_of_pages = len(reader.pages)
for page in range(num_of_pages):
    current_page = reader.pages[page]
    text += current_page.extract_text()


chunks = get_chunks(text)
print(chunks)
print("Chunks are ready.....................................")
print("######################################################")

qdrant = QdrantClient(path = "./db")
print("db  created................................................")
print("#####################################################################")

qdrant.recreate_collection(
    collection_name="my_facts",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
        distance=models.Distance.COSINE,
    ),
)

print("Collection created........................................")
print("#########################################################")


# starting a list of same size as chunks
li = []
for i in range(len(chunks)):
    li.append(i)
# concantinating the li and chunks to create a dcitionary  
dic = zip(li, chunks)
dic= dict(dic)

qdrant.upload_records(
    collection_name="my_facts",
    records=[
        models.Record(
            id=idx,
            vector=encoder.encode(dic[idx]).tolist(),
            payload= {dic[idx][:5] : dic[idx]}
## payload is always suppose to be a dictionary with both keys and values as strings. To do this, I used first 5 chars of
## every value as key to make the payload.
        ) for idx in dic.keys()
    ],
)

print("Records uploaded........................................")
print("###########################################################")

def chat(question):
    # question = input("ask question from pdf.....")


    hits = qdrant.search(
        collection_name="my_facts",
        query_vector=encoder.encode(question).tolist(),
        limit=3
    )
    context = []
    for hit in hits:
    #   print(hit.payload, "score:", hit.score)
      context.append(list(hit.payload.values())[0])
    #   context += str(hit.payload[hit.payload.values()[:5]])
    # print("##################################################################")

    context = context[0] + context[1] + context[2]

    system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
    Read the given context before answering questions and think step by step. If you can not answer a user question based on 
    the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""


    B_INST, E_INST = "[INST]", "[/INST]"

    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

    SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS

    instruction = f""" 
    Context: {context}
    User: {question}"""

    prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST

    result = llm(prompt_template)
    return result 

gr.Interface(
    fn = chat,
    inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
    outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
    title="Q&N with PDF 👩🏻‍💻📓✍🏻💡",
    description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡",
    theme="soft",
    examples=["Hello", "what is the speed of human nerve impulses?"],
    # cache_examples=True,
).launch()