Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,15 +9,15 @@ import requests
|
|
9 |
from openai import OpenAI, AsyncOpenAI
|
10 |
|
11 |
clients = {
|
12 |
-
'32B (work in progress)': [
|
13 |
-
'32B QWQ (experimental, without any additional tuning after LEP!)': [
|
14 |
-
'7B (work in progress)': [
|
15 |
-
'3B': [
|
16 |
}
|
17 |
#client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
|
18 |
|
19 |
|
20 |
-
|
21 |
message,
|
22 |
history: list[tuple[str, str]],
|
23 |
model_name,
|
@@ -41,7 +41,7 @@ async def respond(
|
|
41 |
|
42 |
response = ""
|
43 |
|
44 |
-
res =
|
45 |
model=clients[model_name][1],
|
46 |
messages=messages,
|
47 |
temperature=temperature,
|
@@ -81,6 +81,7 @@ demo = gr.ChatInterface(
|
|
81 |
),
|
82 |
gr.Slider(minimum=0.9, maximum=1.5, value=1.05, step=0.05, label="repetition_penalty"),
|
83 |
],
|
|
|
84 |
)
|
85 |
|
86 |
|
|
|
9 |
from openai import OpenAI, AsyncOpenAI
|
10 |
|
11 |
clients = {
|
12 |
+
'32B (work in progress)': [OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_32B')), os.getenv('MODEL_NAME_32B')],
|
13 |
+
'32B QWQ (experimental, without any additional tuning after LEP!)': [OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_QWQ')), os.getenv('MODEL_NAME_QWQ')],
|
14 |
+
'7B (work in progress)': [OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_7B')), 'RefalMachine/ruadapt_qwen2.5_7B_ext_u48_instruct'],
|
15 |
+
'3B': [OpenAI(api_key='123', base_url=os.getenv('MODEL_NAME_OR_PATH_3B')), 'RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4']
|
16 |
}
|
17 |
#client = InferenceClient(os.getenv('MODEL_NAME_OR_PATH'))
|
18 |
|
19 |
|
20 |
+
def respond(
|
21 |
message,
|
22 |
history: list[tuple[str, str]],
|
23 |
model_name,
|
|
|
41 |
|
42 |
response = ""
|
43 |
|
44 |
+
res = clients[model_name][0].chat.completions.create(
|
45 |
model=clients[model_name][1],
|
46 |
messages=messages,
|
47 |
temperature=temperature,
|
|
|
81 |
),
|
82 |
gr.Slider(minimum=0.9, maximum=1.5, value=1.05, step=0.05, label="repetition_penalty"),
|
83 |
],
|
84 |
+
concurrency_limit=10
|
85 |
)
|
86 |
|
87 |
|