zephyr only
Browse files
CLAUDE.md
CHANGED
@@ -85,9 +85,8 @@ const client = await window.gradioClient.Client.connect("space-name");
|
|
85 |
|
86 |
**Current Gradio Connections:**
|
87 |
- **Flux Image Generation**: `Fraser/flux`
|
88 |
-
- **Joy Caption**: `fancyfeast/joy-caption-
|
89 |
-
- **Zephyr-7B Text Generation**: `Fraser/zephyr-7b`
|
90 |
-
- **Qwen3 Text Generation**: `Qwen/Qwen3-Demo` (primary)
|
91 |
|
92 |
**Build Notes:**
|
93 |
- DO NOT install Gradio Client via npm (`npm install @gradio/client`) - it causes build failures
|
@@ -95,10 +94,10 @@ const client = await window.gradioClient.Client.connect("space-name");
|
|
95 |
- All Gradio connections should use the established pattern from App.svelte
|
96 |
|
97 |
### Text Generation Architecture
|
98 |
-
The project uses a
|
99 |
-
1. **
|
100 |
-
2. **
|
101 |
-
3. **
|
102 |
|
103 |
## Troubleshooting
|
104 |
|
@@ -108,13 +107,13 @@ The project uses a smart fallback system:
|
|
108 |
- **Missing dependencies**: Run `npm install` if packages are missing
|
109 |
|
110 |
### Monster Generation Issues
|
111 |
-
- **Name extraction problems**: Check `MonsterGenerator.svelte`
|
112 |
-
- **
|
113 |
- **Image processing errors**: Verify Flux and Joy Caption clients are properly connected
|
114 |
|
115 |
### Performance
|
116 |
- **Large image files**: Consider image compression before upload
|
117 |
-
- **Slow generation**:
|
118 |
- **Battle lag**: IndexedDB operations are async - ensure proper await usage
|
119 |
|
120 |
## Important Notes
|
|
|
85 |
|
86 |
**Current Gradio Connections:**
|
87 |
- **Flux Image Generation**: `Fraser/flux`
|
88 |
+
- **Joy Caption**: `fancyfeast/joy-caption-alpha-two`
|
89 |
+
- **Zephyr-7B Text Generation**: `Fraser/zephyr-7b`
|
|
|
90 |
|
91 |
**Build Notes:**
|
92 |
- DO NOT install Gradio Client via npm (`npm install @gradio/client`) - it causes build failures
|
|
|
94 |
- All Gradio connections should use the established pattern from App.svelte
|
95 |
|
96 |
### Text Generation Architecture
|
97 |
+
The project uses a simple, direct approach:
|
98 |
+
1. **Zephyr-7B**: Direct connection to `Fraser/zephyr-7b` space for all text generation
|
99 |
+
2. **Direct API calls**: Components use `zephyrClient.predict("/chat", [...])` directly
|
100 |
+
3. **No fallback complexity**: Simple, reliable single-client architecture
|
101 |
|
102 |
## Troubleshooting
|
103 |
|
|
|
107 |
- **Missing dependencies**: Run `npm install` if packages are missing
|
108 |
|
109 |
### Monster Generation Issues
|
110 |
+
- **Name extraction problems**: Check `MonsterGenerator.svelte` - regex should extract content after `# Monster Name`
|
111 |
+
- **Zephyr-7B connection failures**: Verify `Fraser/zephyr-7b` space is accessible
|
112 |
- **Image processing errors**: Verify Flux and Joy Caption clients are properly connected
|
113 |
|
114 |
### Performance
|
115 |
- **Large image files**: Consider image compression before upload
|
116 |
+
- **Slow generation**: Zephyr-7B may take 10-30 seconds for complex monster concepts
|
117 |
- **Battle lag**: IndexedDB operations are async - ensure proper await usage
|
118 |
|
119 |
## Important Notes
|
src/lib/components/MonsterGenerator/MonsterGenerator.svelte
CHANGED
@@ -9,20 +9,11 @@
|
|
9 |
import { extractPicletMetadata } from '$lib/services/picletMetadata';
|
10 |
import { savePicletInstance } from '$lib/db/piclets';
|
11 |
import { PicletType, TYPE_DATA } from '$lib/types/picletTypes';
|
12 |
-
import { textGenerationManager } from '$lib/services/textGenerationClient';
|
13 |
|
14 |
interface Props extends MonsterGeneratorProps {}
|
15 |
|
16 |
let { joyCaptionClient, zephyrClient, fluxClient }: Props = $props();
|
17 |
|
18 |
-
// Initialize text generation manager with Zephyr-7B fallback support
|
19 |
-
$effect(() => {
|
20 |
-
if (zephyrClient) {
|
21 |
-
textGenerationManager.setFallbackClient(zephyrClient);
|
22 |
-
textGenerationManager.initialize();
|
23 |
-
}
|
24 |
-
});
|
25 |
-
|
26 |
let state: MonsterWorkflowState = $state({
|
27 |
currentStep: 'upload',
|
28 |
userImage: null,
|
@@ -228,7 +219,7 @@ Focus on: colors, body shape, eyes, limbs, mouth, and key visual features. Omit
|
|
228 |
console.log('Using smart text generation for visual description extraction');
|
229 |
|
230 |
try {
|
231 |
-
const output = await
|
232 |
promptGenerationPrompt, // message
|
233 |
[], // chat_history
|
234 |
systemPrompt, // system_prompt
|
@@ -391,7 +382,7 @@ Write your response within \`\`\`json\`\`\``;
|
|
391 |
console.log('Generating monster stats from concept');
|
392 |
|
393 |
try {
|
394 |
-
const output = await
|
395 |
statsPrompt, // message
|
396 |
[], // chat_history
|
397 |
systemPrompt, // system_prompt
|
|
|
9 |
import { extractPicletMetadata } from '$lib/services/picletMetadata';
|
10 |
import { savePicletInstance } from '$lib/db/piclets';
|
11 |
import { PicletType, TYPE_DATA } from '$lib/types/picletTypes';
|
|
|
12 |
|
13 |
interface Props extends MonsterGeneratorProps {}
|
14 |
|
15 |
let { joyCaptionClient, zephyrClient, fluxClient }: Props = $props();
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
let state: MonsterWorkflowState = $state({
|
18 |
currentStep: 'upload',
|
19 |
userImage: null,
|
|
|
219 |
console.log('Using smart text generation for visual description extraction');
|
220 |
|
221 |
try {
|
222 |
+
const output = await zephyrClient!.predict("/chat", [
|
223 |
promptGenerationPrompt, // message
|
224 |
[], // chat_history
|
225 |
systemPrompt, // system_prompt
|
|
|
382 |
console.log('Generating monster stats from concept');
|
383 |
|
384 |
try {
|
385 |
+
const output = await zephyrClient!.predict("/chat", [
|
386 |
statsPrompt, // message
|
387 |
[], // chat_history
|
388 |
systemPrompt, // system_prompt
|
src/lib/services/qwen3Client.ts
DELETED
@@ -1,239 +0,0 @@
|
|
1 |
-
/**
|
2 |
-
* Qwen3 Client - Drop-in replacement for rwkvClient using Qwen3 HF Space
|
3 |
-
* Compatible with existing rwkvClient.predict("/chat", [...]) API
|
4 |
-
* Uses proper Gradio Client connection instead of direct HTTP calls
|
5 |
-
*/
|
6 |
-
|
7 |
-
interface Qwen3Message {
|
8 |
-
role: 'user' | 'assistant' | 'system';
|
9 |
-
content: string;
|
10 |
-
}
|
11 |
-
|
12 |
-
interface Qwen3ClientOptions {
|
13 |
-
huggingFaceSpace: string;
|
14 |
-
model: string;
|
15 |
-
apiKey?: string;
|
16 |
-
}
|
17 |
-
|
18 |
-
export class Qwen3Client {
|
19 |
-
private options: Qwen3ClientOptions;
|
20 |
-
private sessionId: string;
|
21 |
-
private gradioClient: any = null;
|
22 |
-
|
23 |
-
constructor(options: Partial<Qwen3ClientOptions> = {}) {
|
24 |
-
this.options = {
|
25 |
-
huggingFaceSpace: 'Qwen/Qwen3-Demo',
|
26 |
-
model: 'qwen2.5-72b-instruct', // Use Qwen2.5-72B for best performance
|
27 |
-
...options
|
28 |
-
};
|
29 |
-
this.sessionId = this.generateSessionId();
|
30 |
-
}
|
31 |
-
|
32 |
-
private generateSessionId(): string {
|
33 |
-
return Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
|
34 |
-
}
|
35 |
-
|
36 |
-
/**
|
37 |
-
* Initialize Gradio Client connection to Qwen3 Space
|
38 |
-
*/
|
39 |
-
private async initializeGradioClient(): Promise<void> {
|
40 |
-
if (this.gradioClient) {
|
41 |
-
return; // Already initialized
|
42 |
-
}
|
43 |
-
|
44 |
-
try {
|
45 |
-
// Use the same approach as App.svelte - access window.gradioClient
|
46 |
-
if (!window.gradioClient?.Client) {
|
47 |
-
throw new Error('Gradio Client not available - ensure App.svelte has loaded the client');
|
48 |
-
}
|
49 |
-
|
50 |
-
console.log(`🔗 Connecting to ${this.options.huggingFaceSpace}...`);
|
51 |
-
this.gradioClient = await window.gradioClient.Client.connect(this.options.huggingFaceSpace);
|
52 |
-
|
53 |
-
console.log(`✅ Connected to Qwen3 space: ${this.options.huggingFaceSpace}`);
|
54 |
-
} catch (error) {
|
55 |
-
console.error('Failed to initialize Qwen3 Gradio Client:', error);
|
56 |
-
throw new Error(`Could not connect to Qwen3 space: ${error}`);
|
57 |
-
}
|
58 |
-
}
|
59 |
-
|
60 |
-
/**
|
61 |
-
* Predict method that mimics rwkvClient.predict("/chat", [...]) API
|
62 |
-
* @param endpoint Should be "/chat" for compatibility
|
63 |
-
* @param params Array of parameters: [message, chat_history, system_prompt, max_new_tokens, temperature, top_p, top_k, repetition_penalty]
|
64 |
-
* @returns Promise<{data: any[]}>
|
65 |
-
*/
|
66 |
-
async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
|
67 |
-
if (endpoint !== '/chat') {
|
68 |
-
throw new Error('Qwen3Client only supports "/chat" endpoint');
|
69 |
-
}
|
70 |
-
|
71 |
-
// Note: Qwen3-Demo only uses these 3 parameters from the rwkv-compatible API
|
72 |
-
const [
|
73 |
-
message,
|
74 |
-
chat_history = [],
|
75 |
-
system_prompt = "You are a helpful assistant."
|
76 |
-
] = params;
|
77 |
-
|
78 |
-
try {
|
79 |
-
// Ensure Gradio client is initialized
|
80 |
-
await this.initializeGradioClient();
|
81 |
-
|
82 |
-
// Use the proper Gradio Client API to call the add_message function
|
83 |
-
// Only pass parameters that actually exist in the Qwen3 Gradio app
|
84 |
-
const response = await this.callQwen3API(message, {
|
85 |
-
sys_prompt: system_prompt,
|
86 |
-
model: this.options.model
|
87 |
-
});
|
88 |
-
|
89 |
-
// Return in the expected format: {data: [response_text]}
|
90 |
-
return {
|
91 |
-
data: [response]
|
92 |
-
};
|
93 |
-
|
94 |
-
} catch (error) {
|
95 |
-
console.error('Qwen3Client error:', error);
|
96 |
-
throw new Error(`Qwen3 API call failed: ${error}`);
|
97 |
-
}
|
98 |
-
}
|
99 |
-
|
100 |
-
private async callQwen3API(message: string, options: any): Promise<string> {
|
101 |
-
try {
|
102 |
-
if (!this.gradioClient) {
|
103 |
-
throw new Error('Gradio client not initialized');
|
104 |
-
}
|
105 |
-
|
106 |
-
// Prepare settings for the Qwen3 space based on actual app.py structure
|
107 |
-
// Only use parameters that actually exist in the Gradio app
|
108 |
-
const settingsFormValue = {
|
109 |
-
model: options.model || this.options.model,
|
110 |
-
sys_prompt: options.sys_prompt || "You are a helpful assistant.",
|
111 |
-
thinking_budget: 38 // Use maximum thinking budget for best quality
|
112 |
-
};
|
113 |
-
|
114 |
-
// Thinking button state - disable for faster responses
|
115 |
-
const thinkingBtnState = {
|
116 |
-
enable_thinking: false
|
117 |
-
};
|
118 |
-
|
119 |
-
// Initial state for the conversation
|
120 |
-
const stateValue = {
|
121 |
-
conversation_contexts: {},
|
122 |
-
conversations: [],
|
123 |
-
conversation_id: this.sessionId
|
124 |
-
};
|
125 |
-
|
126 |
-
console.log(`🤖 Calling Qwen3 add_message with: "${message.substring(0, 50)}..."`);
|
127 |
-
|
128 |
-
// Call the add_message function from the Gradio app
|
129 |
-
// Based on app.py line 170: add_message(input_value, settings_form_value, thinking_btn_state_value, state_value)
|
130 |
-
const result = await this.gradioClient.predict("/add_message", [
|
131 |
-
message, // input_value
|
132 |
-
settingsFormValue, // settings_form_value
|
133 |
-
thinkingBtnState, // thinking_btn_state_value
|
134 |
-
stateValue // state_value
|
135 |
-
]);
|
136 |
-
|
137 |
-
console.log('🔍 Raw Qwen3 response:', result);
|
138 |
-
|
139 |
-
// Extract the response text from the Gradio result
|
140 |
-
if (result && result.data && Array.isArray(result.data)) {
|
141 |
-
// The response format should include the chatbot data
|
142 |
-
// Look for the chatbot component data (usually index 2 or 3)
|
143 |
-
for (let i = 0; i < result.data.length; i++) {
|
144 |
-
const item = result.data[i];
|
145 |
-
if (Array.isArray(item) && item.length > 0) {
|
146 |
-
// Look for the last assistant message
|
147 |
-
const lastMessage = item[item.length - 1];
|
148 |
-
if (lastMessage && lastMessage.role === 'assistant' && lastMessage.content) {
|
149 |
-
// Extract text content from the structured content
|
150 |
-
if (Array.isArray(lastMessage.content)) {
|
151 |
-
for (const contentItem of lastMessage.content) {
|
152 |
-
if (contentItem.type === 'text' && contentItem.content) {
|
153 |
-
console.log('✅ Extracted Qwen3 response:', contentItem.content.substring(0, 100) + '...');
|
154 |
-
return contentItem.content;
|
155 |
-
}
|
156 |
-
}
|
157 |
-
} else if (typeof lastMessage.content === 'string') {
|
158 |
-
console.log('✅ Extracted Qwen3 response:', lastMessage.content.substring(0, 100) + '...');
|
159 |
-
return lastMessage.content;
|
160 |
-
}
|
161 |
-
}
|
162 |
-
}
|
163 |
-
}
|
164 |
-
}
|
165 |
-
|
166 |
-
// If we can't extract the response, throw an error to trigger fallback
|
167 |
-
throw new Error('Could not extract text response from Qwen3 API result');
|
168 |
-
|
169 |
-
} catch (error) {
|
170 |
-
console.warn('Qwen3 Gradio API call failed, using fallback strategy:', error);
|
171 |
-
|
172 |
-
// Development fallback: Generate a reasonable response based on the input
|
173 |
-
// If it's a JSON generation request, provide a structured response
|
174 |
-
if (message.includes('JSON') || message.includes('json') || options.sys_prompt?.includes('JSON')) {
|
175 |
-
if (message.includes('monster') || message.includes('stats')) {
|
176 |
-
return this.generateFallbackMonsterStats(message);
|
177 |
-
}
|
178 |
-
return '```json\n{"status": "Qwen3 temporarily unavailable", "using_fallback": true}\n```';
|
179 |
-
}
|
180 |
-
|
181 |
-
// For text generation, provide a reasonable response
|
182 |
-
if (message.includes('visual description') || message.includes('image generation')) {
|
183 |
-
return this.generateFallbackImageDescription(message);
|
184 |
-
}
|
185 |
-
|
186 |
-
return `I understand you're asking about: "${message.substring(0, 100)}..."\n\nHowever, I'm currently unable to connect to the Qwen3 service. The system will automatically fall back to an alternative model for your request.`;
|
187 |
-
}
|
188 |
-
}
|
189 |
-
|
190 |
-
private generateFallbackMonsterStats(userMessage: string): string {
|
191 |
-
// Extract key information from the user message to generate reasonable stats
|
192 |
-
const isRare = userMessage.toLowerCase().includes('rare') || userMessage.toLowerCase().includes('legendary');
|
193 |
-
const isCommon = userMessage.toLowerCase().includes('common') || userMessage.toLowerCase().includes('basic');
|
194 |
-
|
195 |
-
let baseStats = isRare ? 70 : isCommon ? 25 : 45;
|
196 |
-
let variation = isRare ? 25 : isCommon ? 15 : 20;
|
197 |
-
|
198 |
-
const stats = {
|
199 |
-
rarity: isRare ? 'rare' : isCommon ? 'common' : 'uncommon',
|
200 |
-
picletType: 'beast', // Default fallback
|
201 |
-
height: Math.round((Math.random() * 3 + 0.5) * 10) / 10,
|
202 |
-
weight: Math.round((Math.random() * 100 + 10) * 10) / 10,
|
203 |
-
HP: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
|
204 |
-
defence: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
|
205 |
-
attack: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
|
206 |
-
speed: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
|
207 |
-
monsterLore: "A mysterious creature discovered through advanced AI analysis. Its true nature remains to be studied.",
|
208 |
-
specialPassiveTraitDescription: "Adaptive Resilience - This creature adapts to its environment.",
|
209 |
-
attackActionName: "Strike",
|
210 |
-
attackActionDescription: "A focused attack that deals moderate damage.",
|
211 |
-
buffActionName: "Focus",
|
212 |
-
buffActionDescription: "Increases concentration, boosting attack power temporarily.",
|
213 |
-
debuffActionName: "Intimidate",
|
214 |
-
debuffActionDescription: "Reduces the opponent's confidence, lowering their attack.",
|
215 |
-
specialActionName: "Signature Move",
|
216 |
-
specialActionDescription: "A powerful technique unique to this creature."
|
217 |
-
};
|
218 |
-
|
219 |
-
return '```json\n' + JSON.stringify(stats, null, 2) + '\n```';
|
220 |
-
}
|
221 |
-
|
222 |
-
private generateFallbackImageDescription(userMessage: string): string {
|
223 |
-
// Generate a basic visual description based on common elements
|
224 |
-
const colors = ['vibrant blue', 'emerald green', 'golden yellow', 'deep purple', 'crimson red'];
|
225 |
-
const features = ['large expressive eyes', 'sleek form', 'distinctive markings', 'graceful limbs'];
|
226 |
-
|
227 |
-
const color = colors[Math.floor(Math.random() * colors.length)];
|
228 |
-
const feature = features[Math.floor(Math.random() * features.length)];
|
229 |
-
|
230 |
-
return `A ${color} creature with ${feature}, designed in an anime-inspired style with clean lines and appealing proportions.`;
|
231 |
-
}
|
232 |
-
|
233 |
-
/**
|
234 |
-
* No connection testing - let natural failures trigger fallback to Zephyr-7B
|
235 |
-
*/
|
236 |
-
}
|
237 |
-
|
238 |
-
// Export a default instance
|
239 |
-
export const qwen3Client = new Qwen3Client();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/lib/services/textGenerationClient.ts
DELETED
@@ -1,149 +0,0 @@
|
|
1 |
-
/**
|
2 |
-
* Text Generation Client Manager
|
3 |
-
* Provides unified interface for text generation with automatic fallback
|
4 |
-
* Primary: Qwen3 (Qwen/Qwen3-Demo), Fallback: Zephyr-7B (Fraser/zephyr-7b)
|
5 |
-
*/
|
6 |
-
|
7 |
-
import { qwen3Client } from './qwen3Client';
|
8 |
-
|
9 |
-
interface TextGenerationClient {
|
10 |
-
predict(endpoint: string, params: any[]): Promise<{data: any[]}>;
|
11 |
-
}
|
12 |
-
|
13 |
-
class TextGenerationManager {
|
14 |
-
private primaryClient: TextGenerationClient;
|
15 |
-
private fallbackClient: TextGenerationClient | null = null;
|
16 |
-
private useQwen3: boolean = true;
|
17 |
-
private connectionTested: boolean = false;
|
18 |
-
|
19 |
-
constructor() {
|
20 |
-
this.primaryClient = qwen3Client;
|
21 |
-
}
|
22 |
-
|
23 |
-
/**
|
24 |
-
* Set the fallback client (Zephyr-7B)
|
25 |
-
*/
|
26 |
-
setFallbackClient(client: TextGenerationClient) {
|
27 |
-
this.fallbackClient = client;
|
28 |
-
}
|
29 |
-
|
30 |
-
/**
|
31 |
-
* Initialize without testing - assume Qwen3 is available and test on first real use
|
32 |
-
*/
|
33 |
-
async initialize(): Promise<void> {
|
34 |
-
if (this.connectionTested) return;
|
35 |
-
|
36 |
-
console.log('🔧 Initializing text generation manager - using Qwen3 but will fallback to Zephyr-7B on failure');
|
37 |
-
|
38 |
-
// Default to using Qwen3, test will happen on first predict() call
|
39 |
-
this.useQwen3 = true;
|
40 |
-
this.connectionTested = true;
|
41 |
-
|
42 |
-
console.log('✅ Text generation manager initialized - ready to use Qwen3 (with fallback to Zephyr-7B)');
|
43 |
-
}
|
44 |
-
|
45 |
-
|
46 |
-
/**
|
47 |
-
* Get the active client for text generation
|
48 |
-
*/
|
49 |
-
private getActiveClient(): TextGenerationClient {
|
50 |
-
if (this.useQwen3) {
|
51 |
-
return this.primaryClient;
|
52 |
-
} else if (this.fallbackClient) {
|
53 |
-
return this.fallbackClient;
|
54 |
-
} else {
|
55 |
-
console.warn('No fallback client available, using Qwen3 client');
|
56 |
-
return this.primaryClient;
|
57 |
-
}
|
58 |
-
}
|
59 |
-
|
60 |
-
/**
|
61 |
-
* Predict method with automatic fallback - tests on first failure
|
62 |
-
*/
|
63 |
-
async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
|
64 |
-
// Ensure initialization has been attempted
|
65 |
-
if (!this.connectionTested) {
|
66 |
-
await this.initialize();
|
67 |
-
}
|
68 |
-
|
69 |
-
const activeClient = this.getActiveClient();
|
70 |
-
const clientName = this.useQwen3 ? 'Qwen3' : 'Zephyr-7B';
|
71 |
-
|
72 |
-
console.log(`🤖 Using ${clientName} for text generation`);
|
73 |
-
|
74 |
-
try {
|
75 |
-
const result = await activeClient.predict(endpoint, params);
|
76 |
-
return result;
|
77 |
-
} catch (error) {
|
78 |
-
console.error(`${clientName} prediction failed:`, error);
|
79 |
-
|
80 |
-
// If primary client fails and we have a fallback, try it
|
81 |
-
if (this.useQwen3 && this.fallbackClient) {
|
82 |
-
console.log('🔄 Qwen3 failed, switching to fallback Zephyr-7B...');
|
83 |
-
try {
|
84 |
-
const fallbackResult = await this.fallbackClient.predict(endpoint, params);
|
85 |
-
// Mark for future calls to use fallback
|
86 |
-
this.useQwen3 = false;
|
87 |
-
console.log('✅ Fallback to Zephyr-7B successful - will use Zephyr-7B for future requests');
|
88 |
-
return fallbackResult;
|
89 |
-
} catch (fallbackError) {
|
90 |
-
console.error('Fallback client also failed:', fallbackError);
|
91 |
-
throw new Error(`Both primary (${clientName}) and fallback clients failed`);
|
92 |
-
}
|
93 |
-
}
|
94 |
-
|
95 |
-
throw error;
|
96 |
-
}
|
97 |
-
}
|
98 |
-
|
99 |
-
/**
|
100 |
-
* Force switch to Qwen3
|
101 |
-
*/
|
102 |
-
useQwen3Client() {
|
103 |
-
this.useQwen3 = true;
|
104 |
-
console.log('🔄 Switched to Qwen3 client');
|
105 |
-
}
|
106 |
-
|
107 |
-
/**
|
108 |
-
* Force switch to fallback (Zephyr-7B)
|
109 |
-
*/
|
110 |
-
useFallbackClient() {
|
111 |
-
if (this.fallbackClient) {
|
112 |
-
this.useQwen3 = false;
|
113 |
-
console.log('🔄 Switched to fallback (Zephyr-7B) client');
|
114 |
-
} else {
|
115 |
-
console.warn('No fallback client available');
|
116 |
-
}
|
117 |
-
}
|
118 |
-
|
119 |
-
/**
|
120 |
-
* Get current client status
|
121 |
-
*/
|
122 |
-
getStatus() {
|
123 |
-
return {
|
124 |
-
usingQwen3: this.useQwen3,
|
125 |
-
hasFallback: this.fallbackClient !== null,
|
126 |
-
connectionTested: this.connectionTested,
|
127 |
-
activeClient: this.useQwen3 ? 'Qwen3' : 'Zephyr-7B'
|
128 |
-
};
|
129 |
-
}
|
130 |
-
|
131 |
-
/**
|
132 |
-
* Reset connection testing to allow re-initialization
|
133 |
-
*/
|
134 |
-
resetConnectionTest() {
|
135 |
-
this.connectionTested = false;
|
136 |
-
console.log('🔄 Connection test reset - will re-test on next prediction');
|
137 |
-
}
|
138 |
-
|
139 |
-
/**
|
140 |
-
* Force re-test connection and re-initialize
|
141 |
-
*/
|
142 |
-
async retestConnection(): Promise<void> {
|
143 |
-
this.connectionTested = false;
|
144 |
-
await this.initialize();
|
145 |
-
}
|
146 |
-
}
|
147 |
-
|
148 |
-
// Export singleton instance
|
149 |
-
export const textGenerationManager = new TextGenerationManager();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|