AminFaraji commited on
Commit
8cc21a9
·
verified ·
1 Parent(s): 2e90434

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -19
app.py CHANGED
@@ -20,16 +20,6 @@ from dotenv import load_dotenv
20
  import os
21
  import shutil
22
  import torch
23
-
24
- from transformers import AutoModel,AutoTokenizer
25
- model2 = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
26
- tokenizer2 = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
27
-
28
-
29
- # this shoub be used when we can not use sentence_transformers (which reqiures transformers==4.39. we cannot use
30
- # this version since causes using large amount of RAm when loading falcon model)
31
- # a custom embedding
32
- #from sentence_transformers import SentenceTransformer
33
  from langchain_experimental.text_splitter import SemanticChunker
34
  from typing import List
35
  import re
@@ -50,6 +40,27 @@ from transformers import (
50
  pipeline,
51
  )
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  warnings.filterwarnings("ignore", category=UserWarning)
54
 
55
 
@@ -91,15 +102,7 @@ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
91
 
92
 
93
 
94
- MODEL_NAME = "tiiuae/falcon-7b-instruct"
95
 
96
- model = AutoModelForCausalLM.from_pretrained(
97
- MODEL_NAME, trust_remote_code=True, device_map="auto",offload_folder="offload"
98
- )
99
- model = model.eval()
100
-
101
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
102
- print(f"Model device: {model.device}")
103
 
104
 
105
  generation_config = model.generation_config
@@ -224,7 +227,7 @@ def get_llama_response(message: str, history: list) -> str:
224
 
225
  #print(template)
226
  chain.prompt=prompt
227
- res = chain.invoke(query_text)
228
  return(res["response"])
229
 
230
  import gradio as gr
 
20
  import os
21
  import shutil
22
  import torch
 
 
 
 
 
 
 
 
 
 
23
  from langchain_experimental.text_splitter import SemanticChunker
24
  from typing import List
25
  import re
 
40
  pipeline,
41
  )
42
 
43
+
44
+ MODEL_NAME = "tiiuae/falcon-7b-instruct"
45
+
46
+ model = AutoModelForCausalLM.from_pretrained(
47
+ MODEL_NAME, trust_remote_code=True, device_map="auto",offload_folder="offload"
48
+ )
49
+ model = model.eval()
50
+
51
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
52
+ print(f"Model device: {model.device}")
53
+
54
+ from transformers import AutoModel,AutoTokenizer
55
+ model2 = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
56
+ tokenizer2 = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
57
+
58
+
59
+ # this shoub be used when we can not use sentence_transformers (which reqiures transformers==4.39. we cannot use
60
+ # this version since causes using large amount of RAm when loading falcon model)
61
+ # a custom embedding
62
+ #from sentence_transformers import SentenceTransformer
63
+
64
  warnings.filterwarnings("ignore", category=UserWarning)
65
 
66
 
 
102
 
103
 
104
 
 
105
 
 
 
 
 
 
 
 
106
 
107
 
108
  generation_config = model.generation_config
 
227
 
228
  #print(template)
229
  chain.prompt=prompt
230
+ res = chain(query_text)
231
  return(res["response"])
232
 
233
  import gradio as gr