Clement Vachet commited on
Commit
ca60bef
·
1 Parent(s): 58b5050

Simplify LLM list

Browse files
Files changed (1) hide show
  1. app.py +65 -56
app.py CHANGED
@@ -23,13 +23,14 @@ import re
23
 
24
 
25
  # default_persist_directory = './chroma_HF/'
26
- list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
27
- "google/gemma-7b-it","google/gemma-2b-it", \
28
- "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
29
- "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
30
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
31
- "google/flan-t5-xxl"
32
- ]
 
33
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
34
 
35
 
@@ -80,54 +81,63 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
80
  # Warning: langchain issue
81
  # URL: https://github.com/langchain-ai/langchain/issues/6080
82
 
83
- WARNING - simplify LLM use
84
- if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
85
- llm = HuggingFaceEndpoint(
86
- repo_id=llm_model,
87
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
88
- temperature = temperature,
89
- max_new_tokens = max_tokens,
90
- top_k = top_k,
91
- load_in_8bit = True,
92
- )
93
- elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
94
- raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
95
- llm = HuggingFaceEndpoint(
96
- repo_id=llm_model,
97
- temperature = temperature,
98
- max_new_tokens = max_tokens,
99
- top_k = top_k,
100
- )
101
- elif llm_model == "microsoft/phi-2":
102
- # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
103
- llm = HuggingFaceEndpoint(
104
- repo_id=llm_model,
105
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
106
- temperature = temperature,
107
- max_new_tokens = max_tokens,
108
- top_k = top_k,
109
- trust_remote_code = True,
110
- torch_dtype = "auto",
111
- )
112
- elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
113
- llm = HuggingFaceEndpoint(
114
- repo_id=llm_model,
115
- # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
116
- temperature = temperature,
117
- max_new_tokens = 250,
118
- top_k = top_k,
119
- )
120
- elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
121
- raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
122
- llm = HuggingFaceEndpoint(
123
- repo_id=llm_model,
124
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
125
- temperature = temperature,
126
- max_new_tokens = max_tokens,
127
- top_k = top_k,
128
- )
129
- else:
130
- llm = HuggingFaceEndpoint(
 
 
 
 
 
 
 
 
 
131
  repo_id=llm_model,
132
  # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
133
  # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
@@ -135,7 +145,6 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
135
  max_new_tokens = max_tokens,
136
  top_k = top_k,
137
  )
138
-
139
 
140
  progress(0.75, desc="Defining buffer memory...")
141
  memory = ConversationBufferMemory(
 
23
 
24
 
25
  # default_persist_directory = './chroma_HF/'
26
+ # list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
27
+ # "google/gemma-7b-it","google/gemma-2b-it", \
28
+ # "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
29
+ # "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
30
+ # "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
31
+ # "google/flan-t5-xxl"
32
+ # ]
33
+ list_llm = ["mistralai/Mistral-7B-Instruct-v0.2"]
34
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
35
 
36
 
 
81
  # Warning: langchain issue
82
  # URL: https://github.com/langchain-ai/langchain/issues/6080
83
 
84
+ # WARNING - simplify LLM use
85
+ # if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
86
+ # llm = HuggingFaceEndpoint(
87
+ # repo_id=llm_model,
88
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
89
+ # temperature = temperature,
90
+ # max_new_tokens = max_tokens,
91
+ # top_k = top_k,
92
+ # load_in_8bit = True,
93
+ # )
94
+ # elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
95
+ # raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
96
+ # llm = HuggingFaceEndpoint(
97
+ # repo_id=llm_model,
98
+ # temperature = temperature,
99
+ # max_new_tokens = max_tokens,
100
+ # top_k = top_k,
101
+ # )
102
+ # elif llm_model == "microsoft/phi-2":
103
+ # # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
104
+ # llm = HuggingFaceEndpoint(
105
+ # repo_id=llm_model,
106
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
107
+ # temperature = temperature,
108
+ # max_new_tokens = max_tokens,
109
+ # top_k = top_k,
110
+ # trust_remote_code = True,
111
+ # torch_dtype = "auto",
112
+ # )
113
+ # elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
114
+ # llm = HuggingFaceEndpoint(
115
+ # repo_id=llm_model,
116
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
117
+ # temperature = temperature,
118
+ # max_new_tokens = 250,
119
+ # top_k = top_k,
120
+ # )
121
+ # elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
122
+ # raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
123
+ # llm = HuggingFaceEndpoint(
124
+ # repo_id=llm_model,
125
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
126
+ # temperature = temperature,
127
+ # max_new_tokens = max_tokens,
128
+ # top_k = top_k,
129
+ # )
130
+ # else:
131
+ # llm = HuggingFaceEndpoint(
132
+ # repo_id=llm_model,
133
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
134
+ # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
135
+ # temperature = temperature,
136
+ # max_new_tokens = max_tokens,
137
+ # top_k = top_k,
138
+ # )
139
+
140
+ llm = HuggingFaceEndpoint(
141
  repo_id=llm_model,
142
  # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
143
  # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
 
145
  max_new_tokens = max_tokens,
146
  top_k = top_k,
147
  )
 
148
 
149
  progress(0.75, desc="Defining buffer memory...")
150
  memory = ConversationBufferMemory(