File size: 6,685 Bytes
24561f7 ec2b26e 24561f7 ec2b26e 24561f7 ec2b26e 24561f7 ec2b26e 24561f7 ec2b26e 24561f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
/**
* Text Generation Client Manager
* Provides unified interface for text generation with automatic fallback
* Primary: Qwen3 (Qwen/Qwen3-Demo), Fallback: Zephyr-7B (Fraser/zephyr-7b)
*/
import { qwen3Client } from './qwen3Client';
interface TextGenerationClient {
predict(endpoint: string, params: any[]): Promise<{data: any[]}>;
testConnection?(): Promise<boolean>;
}
class TextGenerationManager {
private primaryClient: TextGenerationClient;
private fallbackClient: TextGenerationClient | null = null;
private useQwen3: boolean = true;
private connectionTested: boolean = false;
constructor() {
this.primaryClient = qwen3Client;
}
/**
* Set the fallback client (Zephyr-7B)
*/
setFallbackClient(client: TextGenerationClient) {
this.fallbackClient = client;
}
/**
* Test connection and determine which client to use
*/
async initialize(): Promise<void> {
if (this.connectionTested) return;
console.log('π Testing Qwen3 connection with improved verification...');
try {
if (this.primaryClient.testConnection) {
// Add timeout wrapper for connection test
const connectionTestPromise = this.primaryClient.testConnection();
const timeoutPromise = new Promise<boolean>((_, reject) => {
setTimeout(() => reject(new Error('Connection test timeout')), 15000); // 15 second timeout
});
const qwen3Available = await Promise.race([connectionTestPromise, timeoutPromise]);
if (qwen3Available) {
console.log('β
Qwen3 client is available and will be used for text generation');
this.useQwen3 = true;
// Perform a secondary validation test to ensure real functionality
await this.performSecondaryValidation();
} else {
console.log('β οΈ Qwen3 client is not available, falling back to Zephyr-7B');
this.useQwen3 = false;
}
}
} catch (error) {
console.error('Failed to test Qwen3 connection:', error);
console.log('β οΈ Falling back to Zephyr-7B due to connection error');
this.useQwen3 = false;
}
this.connectionTested = true;
}
/**
* Perform secondary validation to ensure Qwen3 is actually working
*/
private async performSecondaryValidation(): Promise<void> {
try {
console.log('π§ Performing secondary validation of Qwen3 functionality...');
// Test with a specific request that should return predictable content
const validationResult = await Promise.race([
this.primaryClient.predict('/chat', [
'Respond with exactly this text: "VALIDATION_SUCCESS"',
[],
'You are a helpful assistant. Follow instructions exactly as given.',
20, // Very small token limit
0.1, // Low temperature for deterministic output
0.9,
10,
1.0
]),
new Promise<any>((_, reject) => {
setTimeout(() => reject(new Error('Validation timeout')), 10000); // 10 second timeout
})
]);
const response = validationResult?.data?.[0] || '';
const isValidResponse = typeof response === 'string' &&
response.length > 0 &&
!response.includes('temporarily unavailable') &&
!response.includes('using_fallback');
if (!isValidResponse) {
console.warn('β οΈ Secondary validation failed - Qwen3 responses seem invalid, switching to fallback');
this.useQwen3 = false;
} else {
console.log('β
Secondary validation passed - Qwen3 is fully functional');
}
} catch (error) {
console.warn('β οΈ Secondary validation failed with error, switching to fallback:', error);
this.useQwen3 = false;
}
}
/**
* Get the active client for text generation
*/
private getActiveClient(): TextGenerationClient {
if (this.useQwen3) {
return this.primaryClient;
} else if (this.fallbackClient) {
return this.fallbackClient;
} else {
console.warn('No fallback client available, using Qwen3 client');
return this.primaryClient;
}
}
/**
* Predict method with automatic fallback
*/
async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
// Ensure initialization has been attempted
if (!this.connectionTested) {
await this.initialize();
}
const activeClient = this.getActiveClient();
const clientName = this.useQwen3 ? 'Qwen3' : 'Zephyr-7B';
console.log(`π€ Using ${clientName} for text generation`);
try {
const result = await activeClient.predict(endpoint, params);
return result;
} catch (error) {
console.error(`${clientName} prediction failed:`, error);
// If primary client fails and we have a fallback, try it
if (this.useQwen3 && this.fallbackClient) {
console.log('π Qwen3 failed, trying fallback to Zephyr-7B...');
try {
const fallbackResult = await this.fallbackClient.predict(endpoint, params);
// Mark for future calls to use fallback
this.useQwen3 = false;
return fallbackResult;
} catch (fallbackError) {
console.error('Fallback client also failed:', fallbackError);
throw new Error(`Both primary (${clientName}) and fallback clients failed`);
}
}
throw error;
}
}
/**
* Force switch to Qwen3
*/
useQwen3Client() {
this.useQwen3 = true;
console.log('π Switched to Qwen3 client');
}
/**
* Force switch to fallback (Zephyr-7B)
*/
useFallbackClient() {
if (this.fallbackClient) {
this.useQwen3 = false;
console.log('π Switched to fallback (Zephyr-7B) client');
} else {
console.warn('No fallback client available');
}
}
/**
* Get current client status
*/
getStatus() {
return {
usingQwen3: this.useQwen3,
hasFallback: this.fallbackClient !== null,
connectionTested: this.connectionTested,
activeClient: this.useQwen3 ? 'Qwen3' : 'Zephyr-7B'
};
}
/**
* Reset connection testing to allow re-initialization
*/
resetConnectionTest() {
this.connectionTested = false;
console.log('π Connection test reset - will re-test on next prediction');
}
/**
* Force re-test connection and re-initialize
*/
async retestConnection(): Promise<void> {
this.connectionTested = false;
await this.initialize();
}
}
// Export singleton instance
export const textGenerationManager = new TextGenerationManager(); |