Clement Vachet commited on
Commit
26a21fc
·
1 Parent(s): ca60bef

Add api token argument

Browse files
Files changed (1) hide show
  1. app.py +16 -2
app.py CHANGED
@@ -10,6 +10,7 @@ from langchain.chains import ConversationChain
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain_huggingface import HuggingFaceEndpoint
12
 
 
13
  from pathlib import Path
14
  import chromadb
15
  from unidecode import unidecode
@@ -21,6 +22,12 @@ import tqdm
21
  import accelerate
22
  import re
23
 
 
 
 
 
 
 
24
 
25
  # default_persist_directory = './chroma_HF/'
26
  # list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
@@ -90,6 +97,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
90
  # max_new_tokens = max_tokens,
91
  # top_k = top_k,
92
  # load_in_8bit = True,
 
93
  # )
94
  # elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
95
  # raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
@@ -98,6 +106,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
98
  # temperature = temperature,
99
  # max_new_tokens = max_tokens,
100
  # top_k = top_k,
 
101
  # )
102
  # elif llm_model == "microsoft/phi-2":
103
  # # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
@@ -109,6 +118,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
109
  # top_k = top_k,
110
  # trust_remote_code = True,
111
  # torch_dtype = "auto",
 
112
  # )
113
  # elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
114
  # llm = HuggingFaceEndpoint(
@@ -117,6 +127,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
117
  # temperature = temperature,
118
  # max_new_tokens = 250,
119
  # top_k = top_k,
 
120
  # )
121
  # elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
122
  # raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
@@ -126,6 +137,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
126
  # temperature = temperature,
127
  # max_new_tokens = max_tokens,
128
  # top_k = top_k,
 
129
  # )
130
  # else:
131
  # llm = HuggingFaceEndpoint(
@@ -135,8 +147,8 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
135
  # temperature = temperature,
136
  # max_new_tokens = max_tokens,
137
  # top_k = top_k,
 
138
  # )
139
-
140
  llm = HuggingFaceEndpoint(
141
  repo_id=llm_model,
142
  # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
@@ -144,6 +156,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
144
  temperature = temperature,
145
  max_new_tokens = max_tokens,
146
  top_k = top_k,
 
147
  )
148
 
149
  progress(0.75, desc="Defining buffer memory...")
@@ -166,6 +179,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
166
  verbose=False,
167
  )
168
  progress(0.9, desc="Done!")
 
169
  return qa_chain
170
 
171
 
@@ -236,7 +250,7 @@ def conversation(qa_chain, message, history):
236
  #print("formatted_chat_history",formatted_chat_history)
237
 
238
  # Generate response using QA chain
239
- response = qa_chain({"question": message, "chat_history": formatted_chat_history})
240
  response_answer = response["answer"]
241
  if response_answer.find("Helpful Answer:") != -1:
242
  response_answer = response_answer.split("Helpful Answer:")[-1]
 
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain_huggingface import HuggingFaceEndpoint
12
 
13
+
14
  from pathlib import Path
15
  import chromadb
16
  from unidecode import unidecode
 
22
  import accelerate
23
  import re
24
 
25
+ from dotenv import load_dotenv
26
+
27
+
28
+ # Load environment file - HuggingFace API key
29
+ _ = load_dotenv()
30
+ huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_KEY")
31
 
32
  # default_persist_directory = './chroma_HF/'
33
  # list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
 
97
  # max_new_tokens = max_tokens,
98
  # top_k = top_k,
99
  # load_in_8bit = True,
100
+ # huggingfacehub_api_token=huggingfacehub_api_token,
101
  # )
102
  # elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
103
  # raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
 
106
  # temperature = temperature,
107
  # max_new_tokens = max_tokens,
108
  # top_k = top_k,
109
+ # huggingfacehub_api_token=huggingfacehub_api_token,
110
  # )
111
  # elif llm_model == "microsoft/phi-2":
112
  # # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
 
118
  # top_k = top_k,
119
  # trust_remote_code = True,
120
  # torch_dtype = "auto",
121
+ # huggingfacehub_api_token=huggingfacehub_api_token,
122
  # )
123
  # elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
124
  # llm = HuggingFaceEndpoint(
 
127
  # temperature = temperature,
128
  # max_new_tokens = 250,
129
  # top_k = top_k,
130
+ # huggingfacehub_api_token=huggingfacehub_api_token,
131
  # )
132
  # elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
133
  # raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
 
137
  # temperature = temperature,
138
  # max_new_tokens = max_tokens,
139
  # top_k = top_k,
140
+ # huggingfacehub_api_token=huggingfacehub_api_token,
141
  # )
142
  # else:
143
  # llm = HuggingFaceEndpoint(
 
147
  # temperature = temperature,
148
  # max_new_tokens = max_tokens,
149
  # top_k = top_k,
150
+ # huggingfacehub_api_token=huggingfacehub_api_token,
151
  # )
 
152
  llm = HuggingFaceEndpoint(
153
  repo_id=llm_model,
154
  # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
 
156
  temperature = temperature,
157
  max_new_tokens = max_tokens,
158
  top_k = top_k,
159
+ huggingfacehub_api_token=huggingfacehub_api_token,
160
  )
161
 
162
  progress(0.75, desc="Defining buffer memory...")
 
179
  verbose=False,
180
  )
181
  progress(0.9, desc="Done!")
182
+
183
  return qa_chain
184
 
185
 
 
250
  #print("formatted_chat_history",formatted_chat_history)
251
 
252
  # Generate response using QA chain
253
+ response = qa_chain.invoke({"question": message, "chat_history": formatted_chat_history})
254
  response_answer = response["answer"]
255
  if response_answer.find("Helpful Answer:") != -1:
256
  response_answer = response_answer.split("Helpful Answer:")[-1]