Sean-Case
commited on
Commit
·
e4df9f2
1
Parent(s):
d2ddc62
Added support for Mistral Orca
Browse files- chatfuncs/chatfuncs.py +15 -14
chatfuncs/chatfuncs.py
CHANGED
|
@@ -47,7 +47,7 @@ import gradio as gr
|
|
| 47 |
|
| 48 |
if torch.cuda.is_available():
|
| 49 |
torch_device = "cuda"
|
| 50 |
-
gpu_layers =
|
| 51 |
else: torch_device = "cpu"
|
| 52 |
|
| 53 |
print("Running on device:", torch_device)
|
|
@@ -76,8 +76,8 @@ reset: bool = False
|
|
| 76 |
stream: bool = True
|
| 77 |
threads: int = threads
|
| 78 |
batch_size:int = 512
|
| 79 |
-
context_length:int =
|
| 80 |
-
gpu_layers:int = 0#
|
| 81 |
sample = True
|
| 82 |
|
| 83 |
@dataclass
|
|
@@ -114,13 +114,13 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
|
|
| 114 |
|
| 115 |
## Chat models ##
|
| 116 |
ctrans_llm = [] # Not leaded by default
|
| 117 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q4_0.bin')
|
| 118 |
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
| 119 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
|
| 120 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
|
| 121 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
|
| 122 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
|
| 123 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf')
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
#ctokenizer = AutoTokenizer.from_pretrained(ctrans_llm)
|
|
@@ -222,16 +222,14 @@ def create_prompt_templates():
|
|
| 222 |
|
| 223 |
### Response:"""
|
| 224 |
|
| 225 |
-
instruction_prompt_template_orca_input = """
|
| 226 |
-
### System:
|
| 227 |
-
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
| 228 |
-
### User:
|
| 229 |
-
Answer the QUESTION using information from the following input.
|
| 230 |
-
### Input:
|
| 231 |
-
{summaries}
|
| 232 |
-
QUESTION: {question}
|
| 233 |
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
|
| 237 |
|
|
@@ -986,6 +984,9 @@ def _get_chat_history(chat_history: List[Tuple[str, str]], max_memory_length:int
|
|
| 986 |
|
| 987 |
def add_inputs_answer_to_history(user_message, history, current_topic):
|
| 988 |
|
|
|
|
|
|
|
|
|
|
| 989 |
#history.append((user_message, [-1]))
|
| 990 |
|
| 991 |
chat_history_str, chat_history_first_q, chat_history_first_ans, max_memory_length = _get_chat_history(history)
|
|
|
|
| 47 |
|
| 48 |
if torch.cuda.is_available():
|
| 49 |
torch_device = "cuda"
|
| 50 |
+
gpu_layers = 5
|
| 51 |
else: torch_device = "cpu"
|
| 52 |
|
| 53 |
print("Running on device:", torch_device)
|
|
|
|
| 76 |
stream: bool = True
|
| 77 |
threads: int = threads
|
| 78 |
batch_size:int = 512
|
| 79 |
+
context_length:int = 4096
|
| 80 |
+
gpu_layers:int = 0#5#gpu_layers
|
| 81 |
sample = True
|
| 82 |
|
| 83 |
@dataclass
|
|
|
|
| 114 |
|
| 115 |
## Chat models ##
|
| 116 |
ctrans_llm = [] # Not leaded by default
|
|
|
|
| 117 |
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
| 118 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
|
| 119 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
|
| 120 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
|
| 121 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
|
| 122 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
|
| 123 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
|
| 124 |
|
| 125 |
|
| 126 |
#ctokenizer = AutoTokenizer.from_pretrained(ctrans_llm)
|
|
|
|
| 222 |
|
| 223 |
### Response:"""
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
+
instruction_prompt_mistral_orca = """<|im_start|>system\n
|
| 227 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
| 228 |
+
<|im_start|>user\n
|
| 229 |
+
Answer the QUESTION using information from the following CONTENT.
|
| 230 |
+
CONTENT: {summaries}
|
| 231 |
+
QUESTION: {question}\n
|
| 232 |
+
<|im_end|>"""
|
| 233 |
|
| 234 |
|
| 235 |
|
|
|
|
| 984 |
|
| 985 |
def add_inputs_answer_to_history(user_message, history, current_topic):
|
| 986 |
|
| 987 |
+
if history is None:
|
| 988 |
+
history = [("","")]
|
| 989 |
+
|
| 990 |
#history.append((user_message, [-1]))
|
| 991 |
|
| 992 |
chat_history_str, chat_history_first_q, chat_history_first_ans, max_memory_length = _get_chat_history(history)
|