Spaces:

Fraser
/

piclets

Running

File size: 11,238 Bytes

/**
 * Qwen3 Client - Drop-in replacement for rwkvClient using Qwen3 HF Space
 * Compatible with existing rwkvClient.predict("/chat", [...]) API
 * Uses proper Gradio Client connection instead of direct HTTP calls
 */

interface Qwen3Message {
  role: 'user' | 'assistant' | 'system';
  content: string;
}

interface Qwen3ClientOptions {
  huggingFaceSpace: string;
  model: string;
  apiKey?: string;
}

export class Qwen3Client {
  private options: Qwen3ClientOptions;
  private sessionId: string;
  private gradioClient: any = null;

  constructor(options: Partial<Qwen3ClientOptions> = {}) {
    this.options = {
      huggingFaceSpace: 'Qwen/Qwen3-Demo',
      model: 'qwen2.5-72b-instruct', // Use Qwen2.5-72B for best performance
      ...options
    };
    this.sessionId = this.generateSessionId();
  }

  private generateSessionId(): string {
    return Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
  }

  /**
   * Initialize Gradio Client connection to Qwen3 Space
   */
  private async initializeGradioClient(): Promise<void> {
    if (this.gradioClient) {
      return; // Already initialized
    }

    try {
      // Use dynamic import to avoid module issues
      const { Client } = await import('@gradio/client');
      
      console.log(`🔗 Connecting to ${this.options.huggingFaceSpace}...`);
      this.gradioClient = await Client.connect(this.options.huggingFaceSpace);
      
      console.log(`✅ Connected to Qwen3 space: ${this.options.huggingFaceSpace}`);
    } catch (error) {
      console.error('Failed to initialize Qwen3 Gradio Client:', error);
      throw new Error(`Could not connect to Qwen3 space: ${error}`);
    }
  }

  /**
   * Predict method that mimics rwkvClient.predict("/chat", [...]) API
   * @param endpoint Should be "/chat" for compatibility
   * @param params Array of parameters: [message, chat_history, system_prompt, max_new_tokens, temperature, top_p, top_k, repetition_penalty]
   * @returns Promise<{data: any[]}>
   */
  async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
    if (endpoint !== '/chat') {
      throw new Error('Qwen3Client only supports "/chat" endpoint');
    }

    const [
      message,
      chat_history = [],
      system_prompt = "You are a helpful assistant.",
      max_new_tokens = 2048,
      temperature = 0.7,
      top_p = 0.95,
      top_k = 50,
      repetition_penalty = 1.0
    ] = params;

    try {
      // Ensure Gradio client is initialized
      await this.initializeGradioClient();

      // Use the proper Gradio Client API to call the add_message function
      const response = await this.callQwen3API(message, {
        sys_prompt: system_prompt,
        model: this.options.model,
        max_new_tokens,
        temperature,
        top_p,
        top_k,
        repetition_penalty
      });

      // Return in the expected format: {data: [response_text]}
      return {
        data: [response]
      };

    } catch (error) {
      console.error('Qwen3Client error:', error);
      throw new Error(`Qwen3 API call failed: ${error}`);
    }
  }

  private async callQwen3API(message: string, options: any): Promise<string> {
    try {
      if (!this.gradioClient) {
        throw new Error('Gradio client not initialized');
      }

      // Prepare settings for the Qwen3 space based on app.py structure
      const settingsFormValue = {
        model: options.model || this.options.model,
        sys_prompt: options.sys_prompt || "You are a helpful assistant.",
        thinking_budget: Math.min(options.max_new_tokens || 20, 38), // Qwen3 has max 38k thinking budget
        temperature: options.temperature || 0.7,
        top_p: options.top_p || 0.95,
        top_k: options.top_k || 50,
        repetition_penalty: options.repetition_penalty || 1.0
      };

      // Thinking button state - disable for faster responses
      const thinkingBtnState = {
        enable_thinking: false
      };

      // Initial state for the conversation
      const stateValue = {
        conversation_contexts: {},
        conversations: [],
        conversation_id: this.sessionId
      };

      console.log(`🤖 Calling Qwen3 add_message with: "${message.substring(0, 50)}..."`);

      // Call the add_message function from the Gradio app
      // Based on app.py line 170: add_message(input_value, settings_form_value, thinking_btn_state_value, state_value)
      const result = await this.gradioClient.predict("/add_message", [
        message,                // input_value
        settingsFormValue,      // settings_form_value  
        thinkingBtnState,       // thinking_btn_state_value
        stateValue              // state_value
      ]);

      console.log('🔍 Raw Qwen3 response:', result);

      // Extract the response text from the Gradio result
      if (result && result.data && Array.isArray(result.data)) {
        // The response format should include the chatbot data
        // Look for the chatbot component data (usually index 2 or 3)
        for (let i = 0; i < result.data.length; i++) {
          const item = result.data[i];
          if (Array.isArray(item) && item.length > 0) {
            // Look for the last assistant message
            const lastMessage = item[item.length - 1];
            if (lastMessage && lastMessage.role === 'assistant' && lastMessage.content) {
              // Extract text content from the structured content
              if (Array.isArray(lastMessage.content)) {
                for (const contentItem of lastMessage.content) {
                  if (contentItem.type === 'text' && contentItem.content) {
                    console.log('✅ Extracted Qwen3 response:', contentItem.content.substring(0, 100) + '...');
                    return contentItem.content;
                  }
                }
              } else if (typeof lastMessage.content === 'string') {
                console.log('✅ Extracted Qwen3 response:', lastMessage.content.substring(0, 100) + '...');
                return lastMessage.content;
              }
            }
          }
        }
      }

      // If we can't extract the response, throw an error to trigger fallback
      throw new Error('Could not extract text response from Qwen3 API result');

    } catch (error) {
      console.warn('Qwen3 Gradio API call failed, using fallback strategy:', error);
      
      // Development fallback: Generate a reasonable response based on the input
      // If it's a JSON generation request, provide a structured response
      if (message.includes('JSON') || message.includes('json') || options.sys_prompt?.includes('JSON')) {
        if (message.includes('monster') || message.includes('stats')) {
          return this.generateFallbackMonsterStats(message);
        }
        return '```json\n{"status": "Qwen3 temporarily unavailable", "using_fallback": true}\n```';
      }
      
      // For text generation, provide a reasonable response
      if (message.includes('visual description') || message.includes('image generation')) {
        return this.generateFallbackImageDescription(message);
      }
      
      return `I understand you're asking about: "${message.substring(0, 100)}..."\n\nHowever, I'm currently unable to connect to the Qwen3 service. The system will automatically fall back to an alternative model for your request.`;
    }
  }

  private generateFallbackMonsterStats(userMessage: string): string {
    // Extract key information from the user message to generate reasonable stats
    const isRare = userMessage.toLowerCase().includes('rare') || userMessage.toLowerCase().includes('legendary');
    const isCommon = userMessage.toLowerCase().includes('common') || userMessage.toLowerCase().includes('basic');
    
    let baseStats = isRare ? 70 : isCommon ? 25 : 45;
    let variation = isRare ? 25 : isCommon ? 15 : 20;
    
    const stats = {
      rarity: isRare ? 'rare' : isCommon ? 'common' : 'uncommon',
      picletType: 'beast', // Default fallback
      height: Math.round((Math.random() * 3 + 0.5) * 10) / 10,
      weight: Math.round((Math.random() * 100 + 10) * 10) / 10,
      HP: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
      defence: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
      attack: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
      speed: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
      monsterLore: "A mysterious creature discovered through advanced AI analysis. Its true nature remains to be studied.",
      specialPassiveTraitDescription: "Adaptive Resilience - This creature adapts to its environment.",
      attackActionName: "Strike",
      attackActionDescription: "A focused attack that deals moderate damage.",
      buffActionName: "Focus",
      buffActionDescription: "Increases concentration, boosting attack power temporarily.",
      debuffActionName: "Intimidate", 
      debuffActionDescription: "Reduces the opponent's confidence, lowering their attack.",
      specialActionName: "Signature Move",
      specialActionDescription: "A powerful technique unique to this creature."
    };

    return '```json\n' + JSON.stringify(stats, null, 2) + '\n```';
  }

  private generateFallbackImageDescription(userMessage: string): string {
    // Generate a basic visual description based on common elements
    const colors = ['vibrant blue', 'emerald green', 'golden yellow', 'deep purple', 'crimson red'];
    const features = ['large expressive eyes', 'sleek form', 'distinctive markings', 'graceful limbs'];
    
    const color = colors[Math.floor(Math.random() * colors.length)];
    const feature = features[Math.floor(Math.random() * features.length)];
    
    return `A ${color} creature with ${feature}, designed in an anime-inspired style with clean lines and appealing proportions.`;
  }

  /**
   * Test connection to Qwen3 service
   */
  async testConnection(): Promise<boolean> {
    try {
      // Try to initialize the Gradio client first
      await this.initializeGradioClient();
      
      // Test with a simple message
      const result = await this.predict('/chat', [
        'Hello, are you working? Please respond with just "Yes" if you can receive this message.',
        [],
        'You are a helpful assistant. Respond very briefly with just "Yes" if you can receive messages.',
        50, // Small token limit for test
        0.7,
        0.95,
        50,
        1.0
      ]);
      
      const response = result.data && result.data[0] && typeof result.data[0] === 'string' ? result.data[0] : '';
      const isWorking = response.length > 0 && !response.includes('temporarily unavailable');
      
      console.log(`🔍 Qwen3 connection test result: ${isWorking ? 'PASS' : 'FAIL'}`);
      console.log(`📝 Test response: "${response.substring(0, 50)}..."`);
      
      return isWorking;
    } catch (error) {
      console.error('Qwen3 connection test failed:', error);
      return false;
    }
  }
}

// Export a default instance
export const qwen3Client = new Qwen3Client();