rajsecrets0 commited on
Commit
0901470
·
verified ·
1 Parent(s): 7de17f7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import BitsAndBytesConfig
4
+
5
+ # Import llama-index and langchain modules
6
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, PromptTemplate
7
+ from llama_index.llms.huggingface import HuggingFaceLLM
8
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
9
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
10
+ from langchain.embeddings import HuggingFaceEmbeddings
11
+ from llama_index.embeddings.langchain import LangchainEmbedding
12
+
13
+ # ---------------------------
14
+ # Configure your LLM and embeddings
15
+ # ---------------------------
16
+ system_prompt = """
17
+ You are a Q&A assistant. Your goal is to answer questions as
18
+ accurately as possible based on the instructions and context provided.
19
+ """
20
+ query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")
21
+
22
+ # Configure BitsAndBytes for quantization
23
+ quantization_config = BitsAndBytesConfig(
24
+ load_in_8bit=True,
25
+ bnb_4bit_compute_dtype=torch.float16
26
+ )
27
+
28
+ # Initialize the HuggingFaceLLM with your model settings
29
+ llm = HuggingFaceLLM(
30
+ context_window=4096,
31
+ max_new_tokens=256,
32
+ generate_kwargs={"temperature": 0.0, "do_sample": False},
33
+ system_prompt=system_prompt,
34
+ query_wrapper_prompt=query_wrapper_prompt,
35
+ tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
36
+ model_name="meta-llama/Llama-2-7b-chat-hf",
37
+ device_map="auto",
38
+ model_kwargs={
39
+ "torch_dtype": torch.float16,
40
+ "quantization_config": quantization_config
41
+ }
42
+ )
43
+
44
+ # Set up the embedding model using Langchain's HuggingFaceEmbeddings
45
+ lc_embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
46
+ embed_model = LangchainEmbedding(lc_embed_model)
47
+
48
+ # Apply global settings for llama-index
49
+ Settings.llm = llm
50
+ Settings.embed_model = embed_model
51
+ Settings.chunk_size = 1024
52
+
53
+ # ---------------------------
54
+ # Load documents from repository
55
+ # ---------------------------
56
+ # The "data" folder should be part of your repository with your documents.
57
+ DATA_DIR = "data" # Ensure this folder exists and contains your documents.
58
+ try:
59
+ documents = SimpleDirectoryReader(DATA_DIR).load_data()
60
+ except Exception as e:
61
+ st.error(f"Error loading documents from '{DATA_DIR}': {e}")
62
+ documents = []
63
+
64
+ if not documents:
65
+ st.warning("No documents found in the data folder. Please add your documents and redeploy.")
66
+ else:
67
+ # Create the vector store index
68
+ index = VectorStoreIndex.from_documents(documents)
69
+ query_engine = index.as_query_engine()
70
+
71
+ # ---------------------------
72
+ # Streamlit Interface
73
+ # ---------------------------
74
+ st.title("LLama Index Q&A Assistant")
75
+
76
+ user_query = st.text_input("Enter your question:")
77
+
78
+ if user_query:
79
+ with st.spinner("Querying..."):
80
+ response = query_engine.query(user_query)
81
+ st.markdown("### Response:")
82
+ st.write(response)