/** * Text Generation Client Manager * Provides unified interface for text generation with automatic fallback * Primary: Qwen3 (Qwen/Qwen3-Demo), Fallback: Zephyr-7B (Fraser/zephyr-7b) */ import { qwen3Client } from './qwen3Client'; interface TextGenerationClient { predict(endpoint: string, params: any[]): Promise<{data: any[]}>; testConnection?(): Promise; } class TextGenerationManager { private primaryClient: TextGenerationClient; private fallbackClient: TextGenerationClient | null = null; private useQwen3: boolean = true; private connectionTested: boolean = false; constructor() { this.primaryClient = qwen3Client; } /** * Set the fallback client (Zephyr-7B) */ setFallbackClient(client: TextGenerationClient) { this.fallbackClient = client; } /** * Test connection and determine which client to use */ async initialize(): Promise { if (this.connectionTested) return; console.log('🔍 Testing Qwen3 connection with improved verification...'); try { if (this.primaryClient.testConnection) { // Add timeout wrapper for connection test const connectionTestPromise = this.primaryClient.testConnection(); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('Connection test timeout')), 15000); // 15 second timeout }); const qwen3Available = await Promise.race([connectionTestPromise, timeoutPromise]); if (qwen3Available) { console.log('✅ Qwen3 client is available and will be used for text generation'); this.useQwen3 = true; // Perform a secondary validation test to ensure real functionality await this.performSecondaryValidation(); } else { console.log('⚠️ Qwen3 client is not available, falling back to Zephyr-7B'); this.useQwen3 = false; } } } catch (error) { console.error('Failed to test Qwen3 connection:', error); console.log('⚠️ Falling back to Zephyr-7B due to connection error'); this.useQwen3 = false; } this.connectionTested = true; } /** * Perform secondary validation to ensure Qwen3 is actually working */ private async performSecondaryValidation(): Promise { try { console.log('🔧 Performing secondary validation of Qwen3 functionality...'); // Test with a specific request that should return predictable content const validationResult = await Promise.race([ this.primaryClient.predict('/chat', [ 'Respond with exactly this text: "VALIDATION_SUCCESS"', [], 'You are a helpful assistant. Follow instructions exactly as given.', 20, // Very small token limit 0.1, // Low temperature for deterministic output 0.9, 10, 1.0 ]), new Promise((_, reject) => { setTimeout(() => reject(new Error('Validation timeout')), 10000); // 10 second timeout }) ]); const response = validationResult?.data?.[0] || ''; const isValidResponse = typeof response === 'string' && response.length > 0 && !response.includes('temporarily unavailable') && !response.includes('using_fallback'); if (!isValidResponse) { console.warn('⚠️ Secondary validation failed - Qwen3 responses seem invalid, switching to fallback'); this.useQwen3 = false; } else { console.log('✅ Secondary validation passed - Qwen3 is fully functional'); } } catch (error) { console.warn('⚠️ Secondary validation failed with error, switching to fallback:', error); this.useQwen3 = false; } } /** * Get the active client for text generation */ private getActiveClient(): TextGenerationClient { if (this.useQwen3) { return this.primaryClient; } else if (this.fallbackClient) { return this.fallbackClient; } else { console.warn('No fallback client available, using Qwen3 client'); return this.primaryClient; } } /** * Predict method with automatic fallback */ async predict(endpoint: string, params: any[]): Promise<{data: any[]}> { // Ensure initialization has been attempted if (!this.connectionTested) { await this.initialize(); } const activeClient = this.getActiveClient(); const clientName = this.useQwen3 ? 'Qwen3' : 'Zephyr-7B'; console.log(`🤖 Using ${clientName} for text generation`); try { const result = await activeClient.predict(endpoint, params); return result; } catch (error) { console.error(`${clientName} prediction failed:`, error); // If primary client fails and we have a fallback, try it if (this.useQwen3 && this.fallbackClient) { console.log('🔄 Qwen3 failed, trying fallback to Zephyr-7B...'); try { const fallbackResult = await this.fallbackClient.predict(endpoint, params); // Mark for future calls to use fallback this.useQwen3 = false; return fallbackResult; } catch (fallbackError) { console.error('Fallback client also failed:', fallbackError); throw new Error(`Both primary (${clientName}) and fallback clients failed`); } } throw error; } } /** * Force switch to Qwen3 */ useQwen3Client() { this.useQwen3 = true; console.log('🔄 Switched to Qwen3 client'); } /** * Force switch to fallback (Zephyr-7B) */ useFallbackClient() { if (this.fallbackClient) { this.useQwen3 = false; console.log('🔄 Switched to fallback (Zephyr-7B) client'); } else { console.warn('No fallback client available'); } } /** * Get current client status */ getStatus() { return { usingQwen3: this.useQwen3, hasFallback: this.fallbackClient !== null, connectionTested: this.connectionTested, activeClient: this.useQwen3 ? 'Qwen3' : 'Zephyr-7B' }; } /** * Reset connection testing to allow re-initialization */ resetConnectionTest() { this.connectionTested = false; console.log('🔄 Connection test reset - will re-test on next prediction'); } /** * Force re-test connection and re-initialize */ async retestConnection(): Promise { this.connectionTested = false; await this.initialize(); } } // Export singleton instance export const textGenerationManager = new TextGenerationManager();