use hf inference provider for new qwen model
Browse files
app.py
CHANGED
@@ -381,8 +381,8 @@ AVAILABLE_MODELS = [
|
|
381 |
},
|
382 |
{
|
383 |
"name": "Qwen3-235B-A22B-Thinking",
|
384 |
-
"id": "
|
385 |
-
"description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities
|
386 |
}
|
387 |
]
|
388 |
|
@@ -456,23 +456,14 @@ if not HF_TOKEN:
|
|
456 |
|
457 |
def get_inference_client(model_id, provider="auto"):
|
458 |
"""Return an InferenceClient with provider based on model_id and user selection."""
|
459 |
-
# Special case for Dashscope Qwen thinking model
|
460 |
-
if model_id == "qwen3-235b-a22b-thinking-2507":
|
461 |
-
dashscope_api_key = os.getenv("DASHSCOPE_API_KEY")
|
462 |
-
if not dashscope_api_key:
|
463 |
-
raise RuntimeError("DASHSCOPE_API_KEY environment variable is not set. Please set it to your Dashscope API key.")
|
464 |
-
return OpenAI(
|
465 |
-
api_key=dashscope_api_key,
|
466 |
-
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
467 |
-
)
|
468 |
-
|
469 |
-
# Handle other models with HuggingFace InferenceClient
|
470 |
if model_id == "moonshotai/Kimi-K2-Instruct":
|
471 |
provider = "groq"
|
472 |
elif model_id == "Qwen/Qwen3-235B-A22B":
|
473 |
provider = "cerebras"
|
474 |
elif model_id == "Qwen/Qwen3-32B":
|
475 |
provider = "cerebras"
|
|
|
|
|
476 |
return InferenceClient(
|
477 |
provider=provider,
|
478 |
api_key=HF_TOKEN,
|
@@ -1456,19 +1447,12 @@ This will help me create a better design for you."""
|
|
1456 |
else:
|
1457 |
messages.append({'role': 'user', 'content': enhanced_query})
|
1458 |
try:
|
1459 |
-
|
1460 |
-
|
1461 |
-
|
1462 |
-
|
1463 |
-
|
1464 |
-
|
1465 |
-
}
|
1466 |
-
|
1467 |
-
# Add stream_options for Dashscope models for better streaming performance
|
1468 |
-
if _current_model["id"] == "qwen3-235b-a22b-thinking-2507":
|
1469 |
-
completion_params["stream_options"] = {"include_usage": True}
|
1470 |
-
|
1471 |
-
completion = client.chat.completions.create(**completion_params)
|
1472 |
content = ""
|
1473 |
for chunk in completion:
|
1474 |
# Only process if chunk.choices is non-empty
|
|
|
381 |
},
|
382 |
{
|
383 |
"name": "Qwen3-235B-A22B-Thinking",
|
384 |
+
"id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
385 |
+
"description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities"
|
386 |
}
|
387 |
]
|
388 |
|
|
|
456 |
|
457 |
def get_inference_client(model_id, provider="auto"):
|
458 |
"""Return an InferenceClient with provider based on model_id and user selection."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
if model_id == "moonshotai/Kimi-K2-Instruct":
|
460 |
provider = "groq"
|
461 |
elif model_id == "Qwen/Qwen3-235B-A22B":
|
462 |
provider = "cerebras"
|
463 |
elif model_id == "Qwen/Qwen3-32B":
|
464 |
provider = "cerebras"
|
465 |
+
elif model_id == "Qwen/Qwen3-235B-A22B-Thinking-2507":
|
466 |
+
provider = "auto" # Let HuggingFace handle provider selection
|
467 |
return InferenceClient(
|
468 |
provider=provider,
|
469 |
api_key=HF_TOKEN,
|
|
|
1447 |
else:
|
1448 |
messages.append({'role': 'user', 'content': enhanced_query})
|
1449 |
try:
|
1450 |
+
completion = client.chat.completions.create(
|
1451 |
+
model=_current_model["id"],
|
1452 |
+
messages=messages,
|
1453 |
+
stream=True,
|
1454 |
+
max_tokens=10000
|
1455 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1456 |
content = ""
|
1457 |
for chunk in completion:
|
1458 |
# Only process if chunk.choices is non-empty
|