akhaliq HF Staff commited on
Commit
0e5a693
·
1 Parent(s): 461f045

use hf inference provider for new qwen model

Browse files
Files changed (1) hide show
  1. app.py +10 -26
app.py CHANGED
@@ -381,8 +381,8 @@ AVAILABLE_MODELS = [
381
  },
382
  {
383
  "name": "Qwen3-235B-A22B-Thinking",
384
- "id": "qwen3-235b-a22b-thinking-2507",
385
- "description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities via Dashscope"
386
  }
387
  ]
388
 
@@ -456,23 +456,14 @@ if not HF_TOKEN:
456
 
457
  def get_inference_client(model_id, provider="auto"):
458
  """Return an InferenceClient with provider based on model_id and user selection."""
459
- # Special case for Dashscope Qwen thinking model
460
- if model_id == "qwen3-235b-a22b-thinking-2507":
461
- dashscope_api_key = os.getenv("DASHSCOPE_API_KEY")
462
- if not dashscope_api_key:
463
- raise RuntimeError("DASHSCOPE_API_KEY environment variable is not set. Please set it to your Dashscope API key.")
464
- return OpenAI(
465
- api_key=dashscope_api_key,
466
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
467
- )
468
-
469
- # Handle other models with HuggingFace InferenceClient
470
  if model_id == "moonshotai/Kimi-K2-Instruct":
471
  provider = "groq"
472
  elif model_id == "Qwen/Qwen3-235B-A22B":
473
  provider = "cerebras"
474
  elif model_id == "Qwen/Qwen3-32B":
475
  provider = "cerebras"
 
 
476
  return InferenceClient(
477
  provider=provider,
478
  api_key=HF_TOKEN,
@@ -1456,19 +1447,12 @@ This will help me create a better design for you."""
1456
  else:
1457
  messages.append({'role': 'user', 'content': enhanced_query})
1458
  try:
1459
- # Configure completion parameters based on model type
1460
- completion_params = {
1461
- "model": _current_model["id"],
1462
- "messages": messages,
1463
- "stream": True,
1464
- "max_tokens": 10000
1465
- }
1466
-
1467
- # Add stream_options for Dashscope models for better streaming performance
1468
- if _current_model["id"] == "qwen3-235b-a22b-thinking-2507":
1469
- completion_params["stream_options"] = {"include_usage": True}
1470
-
1471
- completion = client.chat.completions.create(**completion_params)
1472
  content = ""
1473
  for chunk in completion:
1474
  # Only process if chunk.choices is non-empty
 
381
  },
382
  {
383
  "name": "Qwen3-235B-A22B-Thinking",
384
+ "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
385
+ "description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities"
386
  }
387
  ]
388
 
 
456
 
457
  def get_inference_client(model_id, provider="auto"):
458
  """Return an InferenceClient with provider based on model_id and user selection."""
 
 
 
 
 
 
 
 
 
 
 
459
  if model_id == "moonshotai/Kimi-K2-Instruct":
460
  provider = "groq"
461
  elif model_id == "Qwen/Qwen3-235B-A22B":
462
  provider = "cerebras"
463
  elif model_id == "Qwen/Qwen3-32B":
464
  provider = "cerebras"
465
+ elif model_id == "Qwen/Qwen3-235B-A22B-Thinking-2507":
466
+ provider = "auto" # Let HuggingFace handle provider selection
467
  return InferenceClient(
468
  provider=provider,
469
  api_key=HF_TOKEN,
 
1447
  else:
1448
  messages.append({'role': 'user', 'content': enhanced_query})
1449
  try:
1450
+ completion = client.chat.completions.create(
1451
+ model=_current_model["id"],
1452
+ messages=messages,
1453
+ stream=True,
1454
+ max_tokens=10000
1455
+ )
 
 
 
 
 
 
 
1456
  content = ""
1457
  for chunk in completion:
1458
  # Only process if chunk.choices is non-empty