NCTCMumbai commited on
Commit
42c7844
·
verified ·
1 Parent(s): c5575ca

Update backend/query_llm.py

Browse files
Files changed (1) hide show
  1. backend/query_llm.py +13 -2
backend/query_llm.py CHANGED
@@ -8,7 +8,7 @@ from typing import Any, Dict, Generator, List
8
 
9
  from huggingface_hub import InferenceClient
10
  from transformers import AutoTokenizer
11
-
12
  #tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
13
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
14
  #tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
@@ -24,7 +24,7 @@ HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
24
  # token=HF_TOKEN
25
  # )
26
 
27
-
28
  hf_client = InferenceClient(
29
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
30
  token=HF_TOKEN
@@ -105,6 +105,17 @@ def generate_hf(prompt: str, history: str, temperature: float = 0.5, max_new_tok
105
  gr.Warning("Unfortunately Mistral is unable to process")
106
  return "I do not know what happened, but I couldn't understand you."
107
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  def generate_openai(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
110
  top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
 
8
 
9
  from huggingface_hub import InferenceClient
10
  from transformers import AutoTokenizer
11
+ from gradio_client import Client
12
  #tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
13
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
14
  #tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
 
24
  # token=HF_TOKEN
25
  # )
26
 
27
+ client = Client("Qwen/Qwen1.5-110B-Chat-demo")
28
  hf_client = InferenceClient(
29
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
30
  token=HF_TOKEN
 
105
  gr.Warning("Unfortunately Mistral is unable to process")
106
  return "I do not know what happened, but I couldn't understand you."
107
 
108
+ def generate_qwen(formatted_prompt: str, history: str):
109
+ response = client.predict(
110
+ query=formatted_prompt,
111
+ history=history,
112
+ system="You are a helpful assistant.",
113
+ api_name="/model_chat"
114
+ )
115
+ print(response)
116
+ return response
117
+
118
+
119
 
120
  def generate_openai(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
121
  top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]: