piclets / src /lib /services /textGenerationClient.ts
Fraser's picture
try fix client
ec2b26e
raw
history blame
6.69 kB
/**
* Text Generation Client Manager
* Provides unified interface for text generation with automatic fallback
* Primary: Qwen3 (Qwen/Qwen3-Demo), Fallback: Zephyr-7B (Fraser/zephyr-7b)
*/
import { qwen3Client } from './qwen3Client';
interface TextGenerationClient {
predict(endpoint: string, params: any[]): Promise<{data: any[]}>;
testConnection?(): Promise<boolean>;
}
class TextGenerationManager {
private primaryClient: TextGenerationClient;
private fallbackClient: TextGenerationClient | null = null;
private useQwen3: boolean = true;
private connectionTested: boolean = false;
constructor() {
this.primaryClient = qwen3Client;
}
/**
* Set the fallback client (Zephyr-7B)
*/
setFallbackClient(client: TextGenerationClient) {
this.fallbackClient = client;
}
/**
* Test connection and determine which client to use
*/
async initialize(): Promise<void> {
if (this.connectionTested) return;
console.log('πŸ” Testing Qwen3 connection with improved verification...');
try {
if (this.primaryClient.testConnection) {
// Add timeout wrapper for connection test
const connectionTestPromise = this.primaryClient.testConnection();
const timeoutPromise = new Promise<boolean>((_, reject) => {
setTimeout(() => reject(new Error('Connection test timeout')), 15000); // 15 second timeout
});
const qwen3Available = await Promise.race([connectionTestPromise, timeoutPromise]);
if (qwen3Available) {
console.log('βœ… Qwen3 client is available and will be used for text generation');
this.useQwen3 = true;
// Perform a secondary validation test to ensure real functionality
await this.performSecondaryValidation();
} else {
console.log('⚠️ Qwen3 client is not available, falling back to Zephyr-7B');
this.useQwen3 = false;
}
}
} catch (error) {
console.error('Failed to test Qwen3 connection:', error);
console.log('⚠️ Falling back to Zephyr-7B due to connection error');
this.useQwen3 = false;
}
this.connectionTested = true;
}
/**
* Perform secondary validation to ensure Qwen3 is actually working
*/
private async performSecondaryValidation(): Promise<void> {
try {
console.log('πŸ”§ Performing secondary validation of Qwen3 functionality...');
// Test with a specific request that should return predictable content
const validationResult = await Promise.race([
this.primaryClient.predict('/chat', [
'Respond with exactly this text: "VALIDATION_SUCCESS"',
[],
'You are a helpful assistant. Follow instructions exactly as given.',
20, // Very small token limit
0.1, // Low temperature for deterministic output
0.9,
10,
1.0
]),
new Promise<any>((_, reject) => {
setTimeout(() => reject(new Error('Validation timeout')), 10000); // 10 second timeout
})
]);
const response = validationResult?.data?.[0] || '';
const isValidResponse = typeof response === 'string' &&
response.length > 0 &&
!response.includes('temporarily unavailable') &&
!response.includes('using_fallback');
if (!isValidResponse) {
console.warn('⚠️ Secondary validation failed - Qwen3 responses seem invalid, switching to fallback');
this.useQwen3 = false;
} else {
console.log('βœ… Secondary validation passed - Qwen3 is fully functional');
}
} catch (error) {
console.warn('⚠️ Secondary validation failed with error, switching to fallback:', error);
this.useQwen3 = false;
}
}
/**
* Get the active client for text generation
*/
private getActiveClient(): TextGenerationClient {
if (this.useQwen3) {
return this.primaryClient;
} else if (this.fallbackClient) {
return this.fallbackClient;
} else {
console.warn('No fallback client available, using Qwen3 client');
return this.primaryClient;
}
}
/**
* Predict method with automatic fallback
*/
async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
// Ensure initialization has been attempted
if (!this.connectionTested) {
await this.initialize();
}
const activeClient = this.getActiveClient();
const clientName = this.useQwen3 ? 'Qwen3' : 'Zephyr-7B';
console.log(`πŸ€– Using ${clientName} for text generation`);
try {
const result = await activeClient.predict(endpoint, params);
return result;
} catch (error) {
console.error(`${clientName} prediction failed:`, error);
// If primary client fails and we have a fallback, try it
if (this.useQwen3 && this.fallbackClient) {
console.log('πŸ”„ Qwen3 failed, trying fallback to Zephyr-7B...');
try {
const fallbackResult = await this.fallbackClient.predict(endpoint, params);
// Mark for future calls to use fallback
this.useQwen3 = false;
return fallbackResult;
} catch (fallbackError) {
console.error('Fallback client also failed:', fallbackError);
throw new Error(`Both primary (${clientName}) and fallback clients failed`);
}
}
throw error;
}
}
/**
* Force switch to Qwen3
*/
useQwen3Client() {
this.useQwen3 = true;
console.log('πŸ”„ Switched to Qwen3 client');
}
/**
* Force switch to fallback (Zephyr-7B)
*/
useFallbackClient() {
if (this.fallbackClient) {
this.useQwen3 = false;
console.log('πŸ”„ Switched to fallback (Zephyr-7B) client');
} else {
console.warn('No fallback client available');
}
}
/**
* Get current client status
*/
getStatus() {
return {
usingQwen3: this.useQwen3,
hasFallback: this.fallbackClient !== null,
connectionTested: this.connectionTested,
activeClient: this.useQwen3 ? 'Qwen3' : 'Zephyr-7B'
};
}
/**
* Reset connection testing to allow re-initialization
*/
resetConnectionTest() {
this.connectionTested = false;
console.log('πŸ”„ Connection test reset - will re-test on next prediction');
}
/**
* Force re-test connection and re-initialize
*/
async retestConnection(): Promise<void> {
this.connectionTested = false;
await this.initialize();
}
}
// Export singleton instance
export const textGenerationManager = new TextGenerationManager();