rishabh5752's picture
Update app.py
a3c89e6 verified
import time
import streamlit as st
from llama_index import ServiceContext, StorageContext, set_global_service_context, VectorStoreIndex, Document
from llama_index.prompts import PromptTemplate
from llama_index.embeddings import LangchainEmbedding
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from PyPDF2 import PdfReader
def modelspecific_prompt(promptmessage):
return f"Instruct: {promptmessage}\nOutput:"
def extract_text_from_pdf(pdf):
pdf_reader = PdfReader(pdf)
data = ''.join(page.extract_text() for page in pdf_reader.pages)
return data.split('\n')
def main():
llm = LlamaCPP(
model_url=None,
model_path='phi-2.Q4_K_M.gguf',
temperature=0.1,
max_new_tokens=512,
context_window=2048,
generate_kwargs={},
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True
)
embed_model = LangchainEmbedding(
HuggingFaceEmbeddings(model_name="bge-small-en-v1.5")
)
service_context = ServiceContext.from_defaults(
chunk_size=128,
chunk_overlap=20,
context_window=2048,
num_output=768,
llm=llm,
embed_model=embed_model
)
set_global_service_context(service_context)
storage_context = StorageContext.from_defaults()
st.title("Llama-CPP Local LLM with RAG (Phi-2 RAG)")
pdf = st.file_uploader("Upload a PDF file", type=["pdf"])
if pdf is not None:
text_list = extract_text_from_pdf(pdf)
documents = [Document(text=t) for t in text_list]
nodes = (service_context.node_parser.get_nodes_from_documents(documents))
storage_context.docstore.add_documents(nodes)
index = (VectorStoreIndex.from_documents(
documents, service_context=service_context, storage_context=storage_context, llm=llm))
custom_prompt = PromptTemplate("Given the following context, answer the question:")
query_engine = index.as_query_engine()
chat_engine = CondenseQuestionChatEngine.from_defaults(
query_engine=query_engine,
condense_question_prompt=custom_prompt,
verbose=True,
)
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("What is up?"):
st.session_state.messages.append(
{"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
assistant_response = chat_engine.chat(
modelspecific_prompt(str(prompt)))
assistant_response = str(assistant_response)
for chunk in assistant_response.split():
full_response += chunk + " "
time.sleep(0.05)
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(full_response)
st.session_state.messages.append(
{"role": "assistant", "content": full_response})
if __name__ == "__main__":
main()