piclets / src /lib /services /qwen3Client.ts
Fraser's picture
switch to qwen3
24561f7
raw
history blame
10 kB
/**
* Qwen3 Client - Drop-in replacement for rwkvClient using Qwen3 HF Space
* Compatible with existing rwkvClient.predict("/chat", [...]) API
*/
interface Qwen3Message {
role: 'user' | 'assistant' | 'system';
content: string;
}
interface Qwen3ClientOptions {
huggingFaceSpace: string;
model: string;
apiKey?: string;
}
export class Qwen3Client {
private options: Qwen3ClientOptions;
private sessionId: string;
constructor(options: Partial<Qwen3ClientOptions> = {}) {
this.options = {
huggingFaceSpace: 'Qwen/Qwen3-Demo',
model: 'qwen3-32b', // Default to Qwen3-32B for good performance/quality balance
...options
};
this.sessionId = this.generateSessionId();
}
private generateSessionId(): string {
return Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
}
/**
* Predict method that mimics rwkvClient.predict("/chat", [...]) API
* @param endpoint Should be "/chat" for compatibility
* @param params Array of parameters: [message, chat_history, system_prompt, max_new_tokens, temperature, top_p, top_k, repetition_penalty]
* @returns Promise<{data: any[]}>
*/
async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
if (endpoint !== '/chat') {
throw new Error('Qwen3Client only supports "/chat" endpoint');
}
const [
message,
chat_history = [],
system_prompt = "You are a helpful assistant.",
max_new_tokens = 2048,
temperature = 0.7,
top_p = 0.95,
top_k = 50,
repetition_penalty = 1.0
] = params;
try {
// Build messages array in the format expected by Qwen3
const messages: Qwen3Message[] = [];
// Add system prompt if provided
if (system_prompt && system_prompt.trim()) {
messages.push({
role: 'system',
content: system_prompt
});
}
// Add chat history
if (Array.isArray(chat_history)) {
chat_history.forEach((entry: any) => {
if (Array.isArray(entry) && entry.length >= 2) {
// Handle [user_message, assistant_message] format
messages.push({
role: 'user',
content: entry[0]
});
messages.push({
role: 'assistant',
content: entry[1]
});
}
});
}
// Add current message
messages.push({
role: 'user',
content: message
});
// Use Hugging Face Spaces API
const response = await this.callQwen3API(messages, {
max_new_tokens,
temperature,
top_p,
top_k,
repetition_penalty
});
// Return in the expected format: {data: [response_text]}
return {
data: [response]
};
} catch (error) {
console.error('Qwen3Client error:', error);
throw new Error(`Qwen3 API call failed: ${error}`);
}
}
private async callQwen3API(messages: Qwen3Message[], options: any): Promise<string> {
// Use the Gradio Client to connect to the Qwen3 HF Space
// For now, simulate the API call until we can get the proper Gradio client working
try {
// Build the message content
const systemMessage = messages.find(m => m.role === 'system')?.content || '';
const userMessage = messages[messages.length - 1].content;
// For development: Use a proper HTTP API approach
// This simulates what the Gradio client would do
const spaceUrl = `https://${this.options.huggingFaceSpace.replace('/', '-')}.hf.space`;
// Construct the API payload similar to what we see in the Qwen3-Demo
const payload = {
data: [
userMessage, // input message
{
model: this.options.model,
sys_prompt: systemMessage,
thinking_budget: Math.min(options.max_new_tokens || 2048, 38) // Qwen3 has max 38k thinking budget
},
{
enable_thinking: false // Disable for faster responses
},
{
conversation_contexts: {},
conversations: [],
conversation_id: this.sessionId
}
],
fn_index: 0 // Function index for add_message
};
// Try the direct API call
const response = await fetch(`${spaceUrl}/api/predict`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(payload)
});
if (response.ok) {
const result = await response.json();
// Parse the Gradio response format
if (result && result.data && Array.isArray(result.data)) {
// Look for chatbot data in the response
for (const item of result.data) {
if (Array.isArray(item) && item.length > 0) {
const lastMessage = item[item.length - 1];
if (lastMessage && lastMessage.content && Array.isArray(lastMessage.content)) {
const textContent = lastMessage.content.find((c: any) => c.type === 'text');
if (textContent && textContent.content) {
return textContent.content;
}
}
}
}
}
throw new Error('Could not extract text from Qwen3 response');
}
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
} catch (error) {
console.warn('Qwen3 direct API call failed, using fallback strategy:', error);
// Development fallback: Generate a reasonable response based on the input
const userMessage = messages[messages.length - 1].content;
const systemMessage = messages.find(m => m.role === 'system')?.content || '';
// If it's a JSON generation request, provide a structured response
if (userMessage.includes('JSON') || userMessage.includes('json') || systemMessage.includes('JSON')) {
if (userMessage.includes('monster') || userMessage.includes('stats')) {
return this.generateFallbackMonsterStats(userMessage);
}
return '```json\n{"status": "Qwen3 temporarily unavailable", "using_fallback": true}\n```';
}
// For text generation, provide a reasonable response
if (userMessage.includes('visual description') || userMessage.includes('image generation')) {
return this.generateFallbackImageDescription(userMessage);
}
return `I understand you're asking about: "${userMessage.substring(0, 100)}..."\n\nHowever, I'm currently unable to connect to the Qwen3 service. The system will automatically fall back to an alternative model for your request.`;
}
}
private generateFallbackMonsterStats(userMessage: string): string {
// Extract key information from the user message to generate reasonable stats
const isRare = userMessage.toLowerCase().includes('rare') || userMessage.toLowerCase().includes('legendary');
const isCommon = userMessage.toLowerCase().includes('common') || userMessage.toLowerCase().includes('basic');
let baseStats = isRare ? 70 : isCommon ? 25 : 45;
let variation = isRare ? 25 : isCommon ? 15 : 20;
const stats = {
rarity: isRare ? 'rare' : isCommon ? 'common' : 'uncommon',
picletType: 'beast', // Default fallback
height: Math.round((Math.random() * 3 + 0.5) * 10) / 10,
weight: Math.round((Math.random() * 100 + 10) * 10) / 10,
HP: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
defence: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
attack: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
speed: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
monsterLore: "A mysterious creature discovered through advanced AI analysis. Its true nature remains to be studied.",
specialPassiveTraitDescription: "Adaptive Resilience - This creature adapts to its environment.",
attackActionName: "Strike",
attackActionDescription: "A focused attack that deals moderate damage.",
buffActionName: "Focus",
buffActionDescription: "Increases concentration, boosting attack power temporarily.",
debuffActionName: "Intimidate",
debuffActionDescription: "Reduces the opponent's confidence, lowering their attack.",
specialActionName: "Signature Move",
specialActionDescription: "A powerful technique unique to this creature."
};
return '```json\n' + JSON.stringify(stats, null, 2) + '\n```';
}
private generateFallbackImageDescription(userMessage: string): string {
// Generate a basic visual description based on common elements
const colors = ['vibrant blue', 'emerald green', 'golden yellow', 'deep purple', 'crimson red'];
const features = ['large expressive eyes', 'sleek form', 'distinctive markings', 'graceful limbs'];
const color = colors[Math.floor(Math.random() * colors.length)];
const feature = features[Math.floor(Math.random() * features.length)];
return `A ${color} creature with ${feature}, designed in an anime-inspired style with clean lines and appealing proportions.`;
}
/**
* Test connection to Qwen3 service
*/
async testConnection(): Promise<boolean> {
try {
const result = await this.predict('/chat', [
'Hello, are you working?',
[],
'You are a helpful assistant. Respond briefly.',
100,
0.7,
0.95,
50,
1.0
]);
return result.data && result.data[0] && typeof result.data[0] === 'string' && result.data[0].length > 0;
} catch (error) {
console.error('Qwen3 connection test failed:', error);
return false;
}
}
}
// Export a default instance
export const qwen3Client = new Qwen3Client();