RefalMachine commited on
Commit
555ac42
·
verified ·
1 Parent(s): 37a3c87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -9,15 +9,15 @@ import requests
9
  from openai import OpenAI, AsyncOpenAI
10
 
11
  clients = {
12
- '32B (work in progress)': [AsyncOpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_32B')), os.getenv('MODEL_NAME_32B')],
13
- '32B QWQ (experimental, without any additional tuning after LEP!)': [AsyncOpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_QWQ')), os.getenv('MODEL_NAME_QWQ')],
14
- '7B (work in progress)': [AsyncOpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_7B')), 'RefalMachine/ruadapt_qwen2.5_7B_ext_u48_instruct'],
15
- '3B': [AsyncOpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_3B')), 'RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4']
16
  }
17
  #client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
18
 
19
 
20
- async def respond(
21
  message,
22
  history: list[tuple[str, str]],
23
  model_name,
@@ -41,7 +41,7 @@ async def respond(
41
 
42
  response = ""
43
 
44
- res = await clients[model_name][0].chat.completions.create(
45
  model=clients[model_name][1],
46
  messages=messages,
47
  temperature=temperature,
@@ -81,6 +81,7 @@ demo = gr.ChatInterface(
81
  ),
82
  gr.Slider(minimum=0.9, maximum=1.5, value=1.05, step=0.05, label="repetition_penalty"),
83
  ],
 
84
  )
85
 
86
 
 
9
  from openai import OpenAI, AsyncOpenAI
10
 
11
  clients = {
12
+ '32B (work in progress)': [OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_32B')), os.getenv('MODEL_NAME_32B')],
13
+ '32B QWQ (experimental, without any additional tuning after LEP!)': [OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_QWQ')), os.getenv('MODEL_NAME_QWQ')],
14
+ '7B (work in progress)': [OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_7B')), 'RefalMachine/ruadapt_qwen2.5_7B_ext_u48_instruct'],
15
+ '3B': [OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_3B')), 'RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4']
16
  }
17
  #client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
18
 
19
 
20
+ def respond(
21
  message,
22
  history: list[tuple[str, str]],
23
  model_name,
 
41
 
42
  response = ""
43
 
44
+ res = clients[model_name][0].chat.completions.create(
45
  model=clients[model_name][1],
46
  messages=messages,
47
  temperature=temperature,
 
81
  ),
82
  gr.Slider(minimum=0.9, maximum=1.5, value=1.05, step=0.05, label="repetition_penalty"),
83
  ],
84
+ concurrency_limit=10
85
  )
86
 
87