Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,6 @@ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
|
10 |
from dotenv import load_dotenv
|
11 |
from htmlTemplates import css
|
12 |
|
13 |
-
|
14 |
# Set Streamlit page configuration
|
15 |
st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")
|
16 |
|
@@ -27,7 +26,7 @@ def load_pipeline():
|
|
27 |
|
28 |
# Load model with offload folder for disk storage of weights
|
29 |
model = AutoModelForCausalLM.from_pretrained(
|
30 |
-
model_name,
|
31 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, # Use float16 for GPU, float32 for CPU
|
32 |
device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
|
33 |
trust_remote_code=True,
|
@@ -36,11 +35,11 @@ def load_pipeline():
|
|
36 |
|
37 |
# Return text-generation pipeline
|
38 |
return pipeline(
|
39 |
-
task="text-generation",
|
40 |
-
model=model,
|
41 |
-
tokenizer=tokenizer,
|
42 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
43 |
-
device_map="auto",
|
44 |
return_full_text=True
|
45 |
)
|
46 |
|
@@ -80,8 +79,8 @@ def get_chunks(raw_text):
|
|
80 |
from langchain.text_splitter import CharacterTextSplitter
|
81 |
text_splitter = CharacterTextSplitter(
|
82 |
separator="\n",
|
83 |
-
chunk_size=
|
84 |
-
chunk_overlap=
|
85 |
length_function=len
|
86 |
)
|
87 |
chunks = text_splitter.split_text(raw_text)
|
@@ -99,11 +98,18 @@ def get_vectorstore(chunks):
|
|
99 |
# Generating response from user queries
|
100 |
def handle_question(question, vectorstore=None):
|
101 |
if vectorstore:
|
102 |
-
|
|
|
103 |
context = "\n".join([doc.page_content for doc in documents])
|
|
|
|
|
|
|
|
|
104 |
if context:
|
105 |
result_with_context = llm_context_chain.invoke({"instruction": question, "context": context})
|
106 |
return result_with_context
|
|
|
|
|
107 |
return llm_chain.invoke({"instruction": question})
|
108 |
|
109 |
def main():
|
|
|
10 |
from dotenv import load_dotenv
|
11 |
from htmlTemplates import css
|
12 |
|
|
|
13 |
# Set Streamlit page configuration
|
14 |
st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")
|
15 |
|
|
|
26 |
|
27 |
# Load model with offload folder for disk storage of weights
|
28 |
model = AutoModelForCausalLM.from_pretrained(
|
29 |
+
model_name,
|
30 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, # Use float16 for GPU, float32 for CPU
|
31 |
device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
|
32 |
trust_remote_code=True,
|
|
|
35 |
|
36 |
# Return text-generation pipeline
|
37 |
return pipeline(
|
38 |
+
task="text-generation",
|
39 |
+
model=model,
|
40 |
+
tokenizer=tokenizer,
|
41 |
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
42 |
+
device_map="auto",
|
43 |
return_full_text=True
|
44 |
)
|
45 |
|
|
|
79 |
from langchain.text_splitter import CharacterTextSplitter
|
80 |
text_splitter = CharacterTextSplitter(
|
81 |
separator="\n",
|
82 |
+
chunk_size=1000, # Reduced chunk size for faster processing
|
83 |
+
chunk_overlap=200, # Smaller overlap for efficiency
|
84 |
length_function=len
|
85 |
)
|
86 |
chunks = text_splitter.split_text(raw_text)
|
|
|
98 |
# Generating response from user queries
|
99 |
def handle_question(question, vectorstore=None):
|
100 |
if vectorstore:
|
101 |
+
# Reduce the number of retrieved chunks for faster processing
|
102 |
+
documents = vectorstore.similarity_search(question, k=2)
|
103 |
context = "\n".join([doc.page_content for doc in documents])
|
104 |
+
|
105 |
+
# Limit context to 1000 characters to speed up model inference
|
106 |
+
context = context[:1000]
|
107 |
+
|
108 |
if context:
|
109 |
result_with_context = llm_context_chain.invoke({"instruction": question, "context": context})
|
110 |
return result_with_context
|
111 |
+
|
112 |
+
# Fallback to instruction-only chain if no context is found
|
113 |
return llm_chain.invoke({"instruction": question})
|
114 |
|
115 |
def main():
|