CogwiseAI commited on
Commit
081c79a
·
1 Parent(s): 2db7511

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -114
app.py CHANGED
@@ -22,7 +22,7 @@ from transformers import (
22
  AutoTokenizer,
23
  BitsAndBytesConfig,
24
  )
25
-
26
 
27
  USER_ICON = "images/user-icon.png"
28
  AI_ICON = "images/ai-icon.png"
@@ -104,123 +104,15 @@ def handle_input():
104
  if len(chat_history) == MAX_HISTORY_LENGTH:
105
  chat_history = chat_history[:-1]
106
 
107
- # api_url = "https://9pl792yjf9.execute-api.us-east-1.amazonaws.com/beta/chatcogwise"
108
- # api_request_data = {"question": input, "session": user_id}
109
- # api_response = requests.post(api_url, json=api_request_data)
110
- # result = api_response.json()
111
-
112
- # answer = result['answer']
113
- # !pip install -Uqqq pip --progress-bar off
114
- # !pip install -qqq bitsandbytes == 0.39.0
115
- # !pip install -qqqtorch --2.0.1 --progress-bar off
116
- # !pip install -qqq -U git + https://github.com/huggingface/transformers.git@e03a9cc --progress-bar off
117
- # !pip install -qqq -U git + https://github.com/huggingface/peft.git@42a184f --progress-bar off
118
- # !pip install -qqq -U git + https://github.com/huggingface/accelerate.git@c9fbb71 --progress-bar off
119
- # !pip install -qqq datasets == 2.12.0 --progress-bar off
120
- # !pip install -qqq loralib == 0.1.1 --progress-bar off
121
- # !pip install einops
122
-
123
- import os
124
- # from pprint import pprint
125
- # import json
126
-
127
-
128
-
129
- os.environ["CUDA_VISIBLE_DEVICES"] = "0"
130
-
131
- # notebook_login()
132
- # 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
133
-
134
- # """### Load dataset"""
135
-
136
- from datasets import load_dataset
137
-
138
- dataset_name = "nisaar/Lawyer_GPT_India"
139
- # dataset_name = "patrick11434/TEST_LLM_DATASET"
140
- dataset = load_dataset(dataset_name, split="train")
141
-
142
- # """## Load adapters from the Hub
143
-
144
- # You can also directly load adapters from the Hub using the commands below:
145
- # """
146
-
147
-
148
- # change peft_model_id
149
- bnb_config = BitsAndBytesConfig(
150
- load_in_4bit=True,
151
- load_4bit_use_double_quant=True,
152
- bnb_4bit_quant_type="nf4",
153
- bnb_4bit_compute_dtype=torch.bfloat16,
154
- )
155
-
156
- peft_model_id = "nisaar/falcon7b-Indian_Law_150Prompts"
157
- config = PeftConfig.from_pretrained(peft_model_id)
158
- model = AutoModelForCausalLM.from_pretrained(
159
- config.base_model_name_or_path,
160
- return_dict=True,
161
- quantization_config=bnb_config,
162
- device_map="auto",
163
- trust_remote_code=True,
164
- )
165
- tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
166
- tokenizer.pad_token = tokenizer.eos_token
167
-
168
- model = PeftModel.from_pretrained(model, peft_model_id)
169
-
170
- """## Inference
171
-
172
- You can then directly use the trained model or the model that you have loaded from the 🤗 Hub for inference as you would do it usually in `transformers`.
173
- """
174
-
175
- generation_config = model.generation_config
176
- generation_config.max_new_tokens = 200
177
- generation_config_temperature = 1
178
- generation_config.top_p = 0.7
179
- generation_config.num_return_sequences = 1
180
- generation_config.pad_token_id = tokenizer.eos_token_id
181
- generation_config_eod_token_id = tokenizer.eos_token_id
182
-
183
- DEVICE = "cuda:0"
184
-
185
- # Commented out IPython magic to ensure Python compatibility.
186
- # %%time
187
- # prompt = f"""
188
- # <human>: Who appoints the Chief Justice of India?
189
- # <assistant>:
190
- # """.strip()
191
- #
192
- # encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
193
- # with torch.inference_mode():
194
- # outputs = model.generate(
195
- # input_ids=encoding.attention_mask,
196
- # generation_config=generation_config,
197
- # )
198
- # print(tokenizer.decode(outputs[0],skip_special_tokens=True))
199
 
200
  def generate_response(question: str) -> str:
201
- prompt = f"""
202
- <human>: {question}
203
- <assistant>:
204
- """.strip()
205
- encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
206
- with torch.inference_mode():
207
- outputs = model.generate(
208
- input_ids=encoding.input_ids,
209
- attention_mask=encoding.attention_mask,
210
- generation_config=generation_config,
211
- )
212
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
213
-
214
- assistant_start = '<assistant>:'
215
- response_start = response.find(assistant_start)
216
- return response[response_start + len(assistant_start):].strip()
217
 
218
- # prompt = "Debate the merits and demerits of introducing simultaneous elections in India?"
219
- prompt=input
220
- answer=generate_response(prompt)
221
- print(answer)
222
 
223
- # answer='Yes'
224
  chat_history.append((input, answer))
225
 
226
  st.session_state.answers.append({
 
22
  AutoTokenizer,
23
  BitsAndBytesConfig,
24
  )
25
+ import pickle
26
 
27
  USER_ICON = "images/user-icon.png"
28
  AI_ICON = "images/ai-icon.png"
 
104
  if len(chat_history) == MAX_HISTORY_LENGTH:
105
  chat_history = chat_history[:-1]
106
 
107
+ with open('model_saved.pkl', 'rb') as f:
108
+ model = pickle.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def generate_response(question: str) -> str:
111
+ return model.predict([question])[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ prompt = input
114
+ answer = generate_response(prompt)
 
 
115
 
 
116
  chat_history.append((input, answer))
117
 
118
  st.session_state.answers.append({