NCTCMumbai
commited on
Update backend/query_llm.py
Browse files- backend/query_llm.py +13 -2
backend/query_llm.py
CHANGED
@@ -8,7 +8,7 @@ from typing import Any, Dict, Generator, List
|
|
8 |
|
9 |
from huggingface_hub import InferenceClient
|
10 |
from transformers import AutoTokenizer
|
11 |
-
|
12 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
|
13 |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
14 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
|
@@ -24,7 +24,7 @@ HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
|
|
24 |
# token=HF_TOKEN
|
25 |
# )
|
26 |
|
27 |
-
|
28 |
hf_client = InferenceClient(
|
29 |
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
30 |
token=HF_TOKEN
|
@@ -105,6 +105,17 @@ def generate_hf(prompt: str, history: str, temperature: float = 0.5, max_new_tok
|
|
105 |
gr.Warning("Unfortunately Mistral is unable to process")
|
106 |
return "I do not know what happened, but I couldn't understand you."
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
def generate_openai(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
|
110 |
top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
|
|
|
8 |
|
9 |
from huggingface_hub import InferenceClient
|
10 |
from transformers import AutoTokenizer
|
11 |
+
from gradio_client import Client
|
12 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
|
13 |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
14 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
|
|
|
24 |
# token=HF_TOKEN
|
25 |
# )
|
26 |
|
27 |
+
client = Client("Qwen/Qwen1.5-110B-Chat-demo")
|
28 |
hf_client = InferenceClient(
|
29 |
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
30 |
token=HF_TOKEN
|
|
|
105 |
gr.Warning("Unfortunately Mistral is unable to process")
|
106 |
return "I do not know what happened, but I couldn't understand you."
|
107 |
|
108 |
+
def generate_qwen(formatted_prompt: str, history: str):
|
109 |
+
response = client.predict(
|
110 |
+
query=formatted_prompt,
|
111 |
+
history=history,
|
112 |
+
system="You are a helpful assistant.",
|
113 |
+
api_name="/model_chat"
|
114 |
+
)
|
115 |
+
print(response)
|
116 |
+
return response
|
117 |
+
|
118 |
+
|
119 |
|
120 |
def generate_openai(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
|
121 |
top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
|