Spaces:
Running
Running
Clement Vachet
commited on
Commit
·
26a21fc
1
Parent(s):
ca60bef
Add api token argument
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ from langchain.chains import ConversationChain
|
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain_huggingface import HuggingFaceEndpoint
|
12 |
|
|
|
13 |
from pathlib import Path
|
14 |
import chromadb
|
15 |
from unidecode import unidecode
|
@@ -21,6 +22,12 @@ import tqdm
|
|
21 |
import accelerate
|
22 |
import re
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# default_persist_directory = './chroma_HF/'
|
26 |
# list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
|
@@ -90,6 +97,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
90 |
# max_new_tokens = max_tokens,
|
91 |
# top_k = top_k,
|
92 |
# load_in_8bit = True,
|
|
|
93 |
# )
|
94 |
# elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
|
95 |
# raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
@@ -98,6 +106,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
98 |
# temperature = temperature,
|
99 |
# max_new_tokens = max_tokens,
|
100 |
# top_k = top_k,
|
|
|
101 |
# )
|
102 |
# elif llm_model == "microsoft/phi-2":
|
103 |
# # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
@@ -109,6 +118,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
109 |
# top_k = top_k,
|
110 |
# trust_remote_code = True,
|
111 |
# torch_dtype = "auto",
|
|
|
112 |
# )
|
113 |
# elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
114 |
# llm = HuggingFaceEndpoint(
|
@@ -117,6 +127,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
117 |
# temperature = temperature,
|
118 |
# max_new_tokens = 250,
|
119 |
# top_k = top_k,
|
|
|
120 |
# )
|
121 |
# elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
122 |
# raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
@@ -126,6 +137,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
126 |
# temperature = temperature,
|
127 |
# max_new_tokens = max_tokens,
|
128 |
# top_k = top_k,
|
|
|
129 |
# )
|
130 |
# else:
|
131 |
# llm = HuggingFaceEndpoint(
|
@@ -135,8 +147,8 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
135 |
# temperature = temperature,
|
136 |
# max_new_tokens = max_tokens,
|
137 |
# top_k = top_k,
|
|
|
138 |
# )
|
139 |
-
|
140 |
llm = HuggingFaceEndpoint(
|
141 |
repo_id=llm_model,
|
142 |
# model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
@@ -144,6 +156,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
144 |
temperature = temperature,
|
145 |
max_new_tokens = max_tokens,
|
146 |
top_k = top_k,
|
|
|
147 |
)
|
148 |
|
149 |
progress(0.75, desc="Defining buffer memory...")
|
@@ -166,6 +179,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
166 |
verbose=False,
|
167 |
)
|
168 |
progress(0.9, desc="Done!")
|
|
|
169 |
return qa_chain
|
170 |
|
171 |
|
@@ -236,7 +250,7 @@ def conversation(qa_chain, message, history):
|
|
236 |
#print("formatted_chat_history",formatted_chat_history)
|
237 |
|
238 |
# Generate response using QA chain
|
239 |
-
response = qa_chain({"question": message, "chat_history": formatted_chat_history})
|
240 |
response_answer = response["answer"]
|
241 |
if response_answer.find("Helpful Answer:") != -1:
|
242 |
response_answer = response_answer.split("Helpful Answer:")[-1]
|
|
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain_huggingface import HuggingFaceEndpoint
|
12 |
|
13 |
+
|
14 |
from pathlib import Path
|
15 |
import chromadb
|
16 |
from unidecode import unidecode
|
|
|
22 |
import accelerate
|
23 |
import re
|
24 |
|
25 |
+
from dotenv import load_dotenv
|
26 |
+
|
27 |
+
|
28 |
+
# Load environment file - HuggingFace API key
|
29 |
+
_ = load_dotenv()
|
30 |
+
huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_KEY")
|
31 |
|
32 |
# default_persist_directory = './chroma_HF/'
|
33 |
# list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
|
|
|
97 |
# max_new_tokens = max_tokens,
|
98 |
# top_k = top_k,
|
99 |
# load_in_8bit = True,
|
100 |
+
# huggingfacehub_api_token=huggingfacehub_api_token,
|
101 |
# )
|
102 |
# elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
|
103 |
# raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
|
|
106 |
# temperature = temperature,
|
107 |
# max_new_tokens = max_tokens,
|
108 |
# top_k = top_k,
|
109 |
+
# huggingfacehub_api_token=huggingfacehub_api_token,
|
110 |
# )
|
111 |
# elif llm_model == "microsoft/phi-2":
|
112 |
# # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
|
|
118 |
# top_k = top_k,
|
119 |
# trust_remote_code = True,
|
120 |
# torch_dtype = "auto",
|
121 |
+
# huggingfacehub_api_token=huggingfacehub_api_token,
|
122 |
# )
|
123 |
# elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
124 |
# llm = HuggingFaceEndpoint(
|
|
|
127 |
# temperature = temperature,
|
128 |
# max_new_tokens = 250,
|
129 |
# top_k = top_k,
|
130 |
+
# huggingfacehub_api_token=huggingfacehub_api_token,
|
131 |
# )
|
132 |
# elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
133 |
# raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
|
|
137 |
# temperature = temperature,
|
138 |
# max_new_tokens = max_tokens,
|
139 |
# top_k = top_k,
|
140 |
+
# huggingfacehub_api_token=huggingfacehub_api_token,
|
141 |
# )
|
142 |
# else:
|
143 |
# llm = HuggingFaceEndpoint(
|
|
|
147 |
# temperature = temperature,
|
148 |
# max_new_tokens = max_tokens,
|
149 |
# top_k = top_k,
|
150 |
+
# huggingfacehub_api_token=huggingfacehub_api_token,
|
151 |
# )
|
|
|
152 |
llm = HuggingFaceEndpoint(
|
153 |
repo_id=llm_model,
|
154 |
# model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
|
|
156 |
temperature = temperature,
|
157 |
max_new_tokens = max_tokens,
|
158 |
top_k = top_k,
|
159 |
+
huggingfacehub_api_token=huggingfacehub_api_token,
|
160 |
)
|
161 |
|
162 |
progress(0.75, desc="Defining buffer memory...")
|
|
|
179 |
verbose=False,
|
180 |
)
|
181 |
progress(0.9, desc="Done!")
|
182 |
+
|
183 |
return qa_chain
|
184 |
|
185 |
|
|
|
250 |
#print("formatted_chat_history",formatted_chat_history)
|
251 |
|
252 |
# Generate response using QA chain
|
253 |
+
response = qa_chain.invoke({"question": message, "chat_history": formatted_chat_history})
|
254 |
response_answer = response["answer"]
|
255 |
if response_answer.find("Helpful Answer:") != -1:
|
256 |
response_answer = response_answer.split("Helpful Answer:")[-1]
|