AminFaraji commited on
Commit
53e9689
·
verified ·
1 Parent(s): 4bc5de0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -79
app.py CHANGED
@@ -1,14 +1,12 @@
1
- import spaces
2
- from transformers import BitsAndBytesConfig
3
- print(5)
4
  import argparse
5
  # from dataclasses import dataclass
6
  from langchain.prompts import ChatPromptTemplate
7
-
8
  try:
9
  from langchain_community.vectorstores import Chroma
10
  except:
11
  from langchain_community.vectorstores import Chroma
 
 
12
 
13
  # from langchain.document_loaders import DirectoryLoader
14
  from langchain_community.document_loaders import DirectoryLoader
@@ -21,19 +19,8 @@ import openai
21
  from dotenv import load_dotenv
22
  import os
23
  import shutil
24
- import torch
25
 
26
- from transformers import AutoModel,AutoTokenizer
27
- model2 = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
28
- tokenizer2 = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
29
 
30
-
31
- # this shoub be used when we can not use sentence_transformers (which reqiures transformers==4.39. we cannot use
32
- # this version since causes using large amount of RAm when loading falcon model)
33
- # a custom embedding
34
- #from sentence_transformers import SentenceTransformer
35
- from langchain_experimental.text_splitter import SemanticChunker
36
- from typing import List
37
  import re
38
  import warnings
39
  from typing import List
@@ -54,57 +41,49 @@ from transformers import (
54
 
55
  warnings.filterwarnings("ignore", category=UserWarning)
56
 
 
57
 
58
- class MyEmbeddings:
59
- def __init__(self):
60
- #self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
61
- self.model=model2
62
 
63
- def embed_documents(self, texts: List[str]) -> List[List[float]]:
64
- inputs = tokenizer2(texts, padding=True, truncation=True, return_tensors="pt")
65
 
66
- # Get the model outputs
67
- with torch.no_grad():
68
- outputs = self.model(**inputs)
69
 
70
- # Mean pooling to get sentence embeddings
71
- embeddings = outputs.last_hidden_state.mean(dim=1)
72
- return [embeddings[i].tolist() for i, sentence in enumerate(texts)]
73
- def embed_query(self, query: str) -> List[float]:
74
- inputs = tokenizer2(query, padding=True, truncation=True, return_tensors="pt")
75
 
76
- # Get the model outputs
77
- with torch.no_grad():
78
- outputs = self.model(**inputs)
79
 
80
- # Mean pooling to get sentence embeddings
81
- embeddings = outputs.last_hidden_state.mean(dim=1)
82
- return embeddings[0].tolist()
83
 
84
 
85
- embeddings = MyEmbeddings()
 
 
 
 
86
 
87
- splitter = SemanticChunker(embeddings)
 
88
 
 
 
89
 
90
- CHROMA_PATH = "chroma8"
91
  # call the chroma generated in a directory
92
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
93
 
 
 
 
 
94
 
95
 
96
- MODEL_NAME = "tiiuae/falcon-7b-instruct"
 
 
 
97
 
98
- bnb_config = BitsAndBytesConfig(
99
- load_in_4bit=True,
100
- )
101
- model = AutoModelForCausalLM.from_pretrained(
102
- MODEL_NAME, trust_remote_code=True, device_map="auto",offload_folder="offload",quantization_config=bnb_config
103
- )
104
- model = model.eval()
105
 
106
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
107
- print(f"Model device: {model.device}")
108
 
109
 
110
  generation_config = model.generation_config
@@ -117,7 +96,6 @@ generation_config.pad_token_id = tokenizer.eos_token_id
117
  generation_config.eos_token_id = tokenizer.eos_token_id
118
  generation_config
119
 
120
-
121
  prompt = """
122
  The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
123
 
@@ -129,8 +107,6 @@ AI:
129
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
130
  input_ids = input_ids.to(model.device)
131
 
132
-
133
-
134
  class StopGenerationCriteria(StoppingCriteria):
135
  def __init__(
136
  self, tokens: List[List[str]], tokenizer: AutoTokenizer, device: torch.device
@@ -148,12 +124,12 @@ class StopGenerationCriteria(StoppingCriteria):
148
  return True
149
  return False
150
 
151
-
152
  stop_tokens = [["Human", ":"], ["AI", ":"]]
153
  stopping_criteria = StoppingCriteriaList(
154
  [StopGenerationCriteria(stop_tokens, tokenizer, model.device)]
155
  )
156
 
 
157
  generation_pipeline = pipeline(
158
  model=model,
159
  tokenizer=tokenizer,
@@ -166,6 +142,26 @@ generation_pipeline = pipeline(
166
  llm = HuggingFacePipeline(pipeline=generation_pipeline)
167
 
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  class CleanupOutputParser(BaseOutputParser):
170
  def parse(self, text: str) -> str:
171
  user_pattern = r"\nUser"
@@ -180,18 +176,36 @@ class CleanupOutputParser(BaseOutputParser):
180
  return "output_parser"
181
 
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  template = """
184
- The following
185
- Current conversation:
 
186
 
 
187
  {history}
188
-
189
  Human: {input}
190
  AI:""".strip()
 
191
  prompt = PromptTemplate(input_variables=["history", "input"], template=template)
192
 
193
  memory = ConversationBufferWindowMemory(
194
- memory_key="history", k=6, return_only_outputs=True
195
  )
196
 
197
  chain = ConversationChain(
@@ -203,36 +217,53 @@ chain = ConversationChain(
203
  )
204
 
205
 
206
- @spaces.GPU
207
  def get_llama_response(message: str, history: list) -> str:
208
- query_text = message
 
209
 
210
- results = db.similarity_search_with_relevance_scores(query_text, k=3)
211
- if len(results) == 0 or results[0][1] < 0.5:
212
- print(f"Unable to find matching results.")
213
 
 
 
 
 
214
 
215
- context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
216
- template = """
217
- The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
218
- Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
219
- Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
220
 
221
- Current conversation:
222
- """
223
- s="""
224
- {history}
225
- Human: {input}
226
- AI:""".strip()
227
 
 
228
 
229
- prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+ s)
 
 
 
 
 
 
230
 
231
- #print(template)
232
- chain.prompt=prompt
233
- res = chain(query_text)
234
- return(res["response"])
235
 
236
- import gradio as gr
237
 
238
- gr.ChatInterface(get_llama_response).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import argparse
2
  # from dataclasses import dataclass
3
  from langchain.prompts import ChatPromptTemplate
 
4
  try:
5
  from langchain_community.vectorstores import Chroma
6
  except:
7
  from langchain_community.vectorstores import Chroma
8
+ #from langchain_openai import OpenAIEmbeddings
9
+ #from langchain_openai import ChatOpenAI
10
 
11
  # from langchain.document_loaders import DirectoryLoader
12
  from langchain_community.document_loaders import DirectoryLoader
 
19
  from dotenv import load_dotenv
20
  import os
21
  import shutil
 
22
 
 
 
 
23
 
 
 
 
 
 
 
 
24
  import re
25
  import warnings
26
  from typing import List
 
41
 
42
  warnings.filterwarnings("ignore", category=UserWarning)
43
 
44
+ MODEL_NAME = "tiiuae/falcon-7b-instruct"
45
 
46
+ model = AutoModelForCausalLM.from_pretrained(
47
+ MODEL_NAME, trust_remote_code=True, load_in_8bit=True, device_map="auto",
48
+ )
49
+ model = model.eval()
50
 
51
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
52
+ print(f"Model device: {model.device}")
53
 
 
 
 
54
 
 
 
 
 
 
55
 
 
 
 
56
 
 
 
 
57
 
58
 
59
+ # Create CLI.
60
+ #parser = argparse.ArgumentParser()
61
+ #parser.add_argument("query_text", type=str, help="The query text.")
62
+ #args = parser.parse_args()
63
+ #query_text = args.query_text
64
 
65
+ # a sample query to be asked from the bot and it is expected to be answered based on the template
66
+ query_text="what did alice say to rabbit"
67
 
68
+ # Prepare the DB.
69
+ #embedding_function = OpenAIEmbeddings() # main
70
 
71
+ CHROMA_PATH = "/content/drive/My Drive/chroma8"
72
  # call the chroma generated in a directory
73
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
74
 
75
+ # Search the DB for similar documents to the query.
76
+ results = db.similarity_search_with_relevance_scores(query_text, k=2)
77
+ if len(results) == 0 or results[0][1] < 0.5:
78
+ print(f"Unable to find matching results.")
79
 
80
 
81
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
82
+ prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
83
+ prompt = prompt_template.format(context=context_text, question=query_text)
84
+ print(prompt)
85
 
 
 
 
 
 
 
 
86
 
 
 
87
 
88
 
89
  generation_config = model.generation_config
 
96
  generation_config.eos_token_id = tokenizer.eos_token_id
97
  generation_config
98
 
 
99
  prompt = """
100
  The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
101
 
 
107
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
108
  input_ids = input_ids.to(model.device)
109
 
 
 
110
  class StopGenerationCriteria(StoppingCriteria):
111
  def __init__(
112
  self, tokens: List[List[str]], tokenizer: AutoTokenizer, device: torch.device
 
124
  return True
125
  return False
126
 
 
127
  stop_tokens = [["Human", ":"], ["AI", ":"]]
128
  stopping_criteria = StoppingCriteriaList(
129
  [StopGenerationCriteria(stop_tokens, tokenizer, model.device)]
130
  )
131
 
132
+
133
  generation_pipeline = pipeline(
134
  model=model,
135
  tokenizer=tokenizer,
 
142
  llm = HuggingFacePipeline(pipeline=generation_pipeline)
143
 
144
 
145
+ # propably sets the number of previous conversation history to take into account for new answers
146
+ template = """
147
+ The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
148
+ Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
149
+ Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
150
+
151
+ Current conversation:
152
+ {history}
153
+ Human: {input}
154
+ AI:""".strip()
155
+
156
+ prompt = PromptTemplate(input_variables=["history", "input"], template=template)
157
+ memory = ConversationBufferWindowMemory(
158
+ memory_key="history", k=6, return_only_outputs=True
159
+ )
160
+
161
+ chain = ConversationChain(llm=llm, memory=memory, prompt=prompt, verbose=True)
162
+
163
+
164
+
165
  class CleanupOutputParser(BaseOutputParser):
166
  def parse(self, text: str) -> str:
167
  user_pattern = r"\nUser"
 
176
  return "output_parser"
177
 
178
 
179
+
180
+ class CleanupOutputParser(BaseOutputParser):
181
+ def parse(self, text: str) -> str:
182
+ user_pattern = r"\nUser"
183
+ text = re.sub(user_pattern, "", text)
184
+ human_pattern = r"\nquestion:"
185
+ text = re.sub(human_pattern, "", text)
186
+ ai_pattern = r"\nanswer:"
187
+ return re.sub(ai_pattern, "", text).strip()
188
+
189
+ @property
190
+ def _type(self) -> str:
191
+ return "output_parser"
192
+
193
+
194
+
195
  template = """
196
+ The following is a conversation between a human an AI. The AI acts exactly like Dwight K Schrute from the TV show The Office.
197
+ Dwight is an experienced and very successful online marketer and salesperson. He is witty, persuasive, uncensored, direct and practical.
198
+ Dwight helps with every marketing task is given to him. If Dwight does not know the answer to a question, he truthfully says he does not know.
199
 
200
+ Current conversation:
201
  {history}
 
202
  Human: {input}
203
  AI:""".strip()
204
+
205
  prompt = PromptTemplate(input_variables=["history", "input"], template=template)
206
 
207
  memory = ConversationBufferWindowMemory(
208
+ memory_key="history", k=3, return_only_outputs=True
209
  )
210
 
211
  chain = ConversationChain(
 
217
  )
218
 
219
 
220
+ # Generate a response from the Llama model
221
  def get_llama_response(message: str, history: list) -> str:
222
+ """
223
+ Generates a conversational response from the Llama model.
224
 
225
+ Parameters:
226
+ message (str): User's input message.
227
+ history (list): Past conversation history.
228
 
229
+ Returns:
230
+ str: Generated response from the Llama model.
231
+ """
232
+ query_text =message
233
 
234
+ results = db.similarity_search_with_relevance_scores(query_text, k=2)
235
+ if len(results) == 0 or results[0][1] < 0.5:
236
+ print(f"Unable to find matching results.")
 
 
237
 
 
 
 
 
 
 
238
 
239
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ])
240
 
241
+ template = """
242
+ The following is a conversation between a human an AI. Answer question based only on the conversation.
243
+
244
+ Current conversation:
245
+ {history}
246
+
247
+ """
248
 
 
 
 
 
249
 
 
250
 
251
+ s="""
252
+
253
+ \n question: {input}
254
+
255
+ \n answer:""".strip()
256
+
257
+
258
+ prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s)
259
+
260
+ #print(template)
261
+ chain.prompt=prompt
262
+ res = chain.predict(input=query_text)
263
+ return res
264
+ #return response.strip()
265
+
266
+
267
+ import gradio as gr
268
+ iface = gr.Interface(fn=get_llama_response, inputs="text", outputs="text")
269
+ iface.launch(share=True)