Fraser commited on
Commit
6a18e94
·
1 Parent(s): 3eb1d35

zephyr only

Browse files
CLAUDE.md CHANGED
@@ -85,9 +85,8 @@ const client = await window.gradioClient.Client.connect("space-name");
85
 
86
  **Current Gradio Connections:**
87
  - **Flux Image Generation**: `Fraser/flux`
88
- - **Joy Caption**: `fancyfeast/joy-caption-pre-alpha`
89
- - **Zephyr-7B Text Generation**: `Fraser/zephyr-7b` (fallback)
90
- - **Qwen3 Text Generation**: `Qwen/Qwen3-Demo` (primary)
91
 
92
  **Build Notes:**
93
  - DO NOT install Gradio Client via npm (`npm install @gradio/client`) - it causes build failures
@@ -95,10 +94,10 @@ const client = await window.gradioClient.Client.connect("space-name");
95
  - All Gradio connections should use the established pattern from App.svelte
96
 
97
  ### Text Generation Architecture
98
- The project uses a smart fallback system:
99
- 1. **Primary**: Qwen3 via proper Gradio Client connection to `/add_message` endpoint
100
- 2. **Fallback**: Zephyr-7B for when Qwen3 is unavailable
101
- 3. **Manager**: `textGenerationManager` handles automatic switching with connection testing
102
 
103
  ## Troubleshooting
104
 
@@ -108,13 +107,13 @@ The project uses a smart fallback system:
108
  - **Missing dependencies**: Run `npm install` if packages are missing
109
 
110
  ### Monster Generation Issues
111
- - **Name extraction problems**: Check `MonsterGenerator.svelte` line 322 - regex should extract content after `# Monster Name`
112
- - **Qwen3 connection failures**: System automatically falls back to Zephyr-7B if Qwen3 is unavailable
113
  - **Image processing errors**: Verify Flux and Joy Caption clients are properly connected
114
 
115
  ### Performance
116
  - **Large image files**: Consider image compression before upload
117
- - **Slow generation**: Qwen3 may take 10-30 seconds for complex monster concepts
118
  - **Battle lag**: IndexedDB operations are async - ensure proper await usage
119
 
120
  ## Important Notes
 
85
 
86
  **Current Gradio Connections:**
87
  - **Flux Image Generation**: `Fraser/flux`
88
+ - **Joy Caption**: `fancyfeast/joy-caption-alpha-two`
89
+ - **Zephyr-7B Text Generation**: `Fraser/zephyr-7b`
 
90
 
91
  **Build Notes:**
92
  - DO NOT install Gradio Client via npm (`npm install @gradio/client`) - it causes build failures
 
94
  - All Gradio connections should use the established pattern from App.svelte
95
 
96
  ### Text Generation Architecture
97
+ The project uses a simple, direct approach:
98
+ 1. **Zephyr-7B**: Direct connection to `Fraser/zephyr-7b` space for all text generation
99
+ 2. **Direct API calls**: Components use `zephyrClient.predict("/chat", [...])` directly
100
+ 3. **No fallback complexity**: Simple, reliable single-client architecture
101
 
102
  ## Troubleshooting
103
 
 
107
  - **Missing dependencies**: Run `npm install` if packages are missing
108
 
109
  ### Monster Generation Issues
110
+ - **Name extraction problems**: Check `MonsterGenerator.svelte` - regex should extract content after `# Monster Name`
111
+ - **Zephyr-7B connection failures**: Verify `Fraser/zephyr-7b` space is accessible
112
  - **Image processing errors**: Verify Flux and Joy Caption clients are properly connected
113
 
114
  ### Performance
115
  - **Large image files**: Consider image compression before upload
116
+ - **Slow generation**: Zephyr-7B may take 10-30 seconds for complex monster concepts
117
  - **Battle lag**: IndexedDB operations are async - ensure proper await usage
118
 
119
  ## Important Notes
src/lib/components/MonsterGenerator/MonsterGenerator.svelte CHANGED
@@ -9,20 +9,11 @@
9
  import { extractPicletMetadata } from '$lib/services/picletMetadata';
10
  import { savePicletInstance } from '$lib/db/piclets';
11
  import { PicletType, TYPE_DATA } from '$lib/types/picletTypes';
12
- import { textGenerationManager } from '$lib/services/textGenerationClient';
13
 
14
  interface Props extends MonsterGeneratorProps {}
15
 
16
  let { joyCaptionClient, zephyrClient, fluxClient }: Props = $props();
17
 
18
- // Initialize text generation manager with Zephyr-7B fallback support
19
- $effect(() => {
20
- if (zephyrClient) {
21
- textGenerationManager.setFallbackClient(zephyrClient);
22
- textGenerationManager.initialize();
23
- }
24
- });
25
-
26
  let state: MonsterWorkflowState = $state({
27
  currentStep: 'upload',
28
  userImage: null,
@@ -228,7 +219,7 @@ Focus on: colors, body shape, eyes, limbs, mouth, and key visual features. Omit
228
  console.log('Using smart text generation for visual description extraction');
229
 
230
  try {
231
- const output = await textGenerationManager.predict("/chat", [
232
  promptGenerationPrompt, // message
233
  [], // chat_history
234
  systemPrompt, // system_prompt
@@ -391,7 +382,7 @@ Write your response within \`\`\`json\`\`\``;
391
  console.log('Generating monster stats from concept');
392
 
393
  try {
394
- const output = await textGenerationManager.predict("/chat", [
395
  statsPrompt, // message
396
  [], // chat_history
397
  systemPrompt, // system_prompt
 
9
  import { extractPicletMetadata } from '$lib/services/picletMetadata';
10
  import { savePicletInstance } from '$lib/db/piclets';
11
  import { PicletType, TYPE_DATA } from '$lib/types/picletTypes';
 
12
 
13
  interface Props extends MonsterGeneratorProps {}
14
 
15
  let { joyCaptionClient, zephyrClient, fluxClient }: Props = $props();
16
 
 
 
 
 
 
 
 
 
17
  let state: MonsterWorkflowState = $state({
18
  currentStep: 'upload',
19
  userImage: null,
 
219
  console.log('Using smart text generation for visual description extraction');
220
 
221
  try {
222
+ const output = await zephyrClient!.predict("/chat", [
223
  promptGenerationPrompt, // message
224
  [], // chat_history
225
  systemPrompt, // system_prompt
 
382
  console.log('Generating monster stats from concept');
383
 
384
  try {
385
+ const output = await zephyrClient!.predict("/chat", [
386
  statsPrompt, // message
387
  [], // chat_history
388
  systemPrompt, // system_prompt
src/lib/services/qwen3Client.ts DELETED
@@ -1,239 +0,0 @@
1
- /**
2
- * Qwen3 Client - Drop-in replacement for rwkvClient using Qwen3 HF Space
3
- * Compatible with existing rwkvClient.predict("/chat", [...]) API
4
- * Uses proper Gradio Client connection instead of direct HTTP calls
5
- */
6
-
7
- interface Qwen3Message {
8
- role: 'user' | 'assistant' | 'system';
9
- content: string;
10
- }
11
-
12
- interface Qwen3ClientOptions {
13
- huggingFaceSpace: string;
14
- model: string;
15
- apiKey?: string;
16
- }
17
-
18
- export class Qwen3Client {
19
- private options: Qwen3ClientOptions;
20
- private sessionId: string;
21
- private gradioClient: any = null;
22
-
23
- constructor(options: Partial<Qwen3ClientOptions> = {}) {
24
- this.options = {
25
- huggingFaceSpace: 'Qwen/Qwen3-Demo',
26
- model: 'qwen2.5-72b-instruct', // Use Qwen2.5-72B for best performance
27
- ...options
28
- };
29
- this.sessionId = this.generateSessionId();
30
- }
31
-
32
- private generateSessionId(): string {
33
- return Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
34
- }
35
-
36
- /**
37
- * Initialize Gradio Client connection to Qwen3 Space
38
- */
39
- private async initializeGradioClient(): Promise<void> {
40
- if (this.gradioClient) {
41
- return; // Already initialized
42
- }
43
-
44
- try {
45
- // Use the same approach as App.svelte - access window.gradioClient
46
- if (!window.gradioClient?.Client) {
47
- throw new Error('Gradio Client not available - ensure App.svelte has loaded the client');
48
- }
49
-
50
- console.log(`🔗 Connecting to ${this.options.huggingFaceSpace}...`);
51
- this.gradioClient = await window.gradioClient.Client.connect(this.options.huggingFaceSpace);
52
-
53
- console.log(`✅ Connected to Qwen3 space: ${this.options.huggingFaceSpace}`);
54
- } catch (error) {
55
- console.error('Failed to initialize Qwen3 Gradio Client:', error);
56
- throw new Error(`Could not connect to Qwen3 space: ${error}`);
57
- }
58
- }
59
-
60
- /**
61
- * Predict method that mimics rwkvClient.predict("/chat", [...]) API
62
- * @param endpoint Should be "/chat" for compatibility
63
- * @param params Array of parameters: [message, chat_history, system_prompt, max_new_tokens, temperature, top_p, top_k, repetition_penalty]
64
- * @returns Promise<{data: any[]}>
65
- */
66
- async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
67
- if (endpoint !== '/chat') {
68
- throw new Error('Qwen3Client only supports "/chat" endpoint');
69
- }
70
-
71
- // Note: Qwen3-Demo only uses these 3 parameters from the rwkv-compatible API
72
- const [
73
- message,
74
- chat_history = [],
75
- system_prompt = "You are a helpful assistant."
76
- ] = params;
77
-
78
- try {
79
- // Ensure Gradio client is initialized
80
- await this.initializeGradioClient();
81
-
82
- // Use the proper Gradio Client API to call the add_message function
83
- // Only pass parameters that actually exist in the Qwen3 Gradio app
84
- const response = await this.callQwen3API(message, {
85
- sys_prompt: system_prompt,
86
- model: this.options.model
87
- });
88
-
89
- // Return in the expected format: {data: [response_text]}
90
- return {
91
- data: [response]
92
- };
93
-
94
- } catch (error) {
95
- console.error('Qwen3Client error:', error);
96
- throw new Error(`Qwen3 API call failed: ${error}`);
97
- }
98
- }
99
-
100
- private async callQwen3API(message: string, options: any): Promise<string> {
101
- try {
102
- if (!this.gradioClient) {
103
- throw new Error('Gradio client not initialized');
104
- }
105
-
106
- // Prepare settings for the Qwen3 space based on actual app.py structure
107
- // Only use parameters that actually exist in the Gradio app
108
- const settingsFormValue = {
109
- model: options.model || this.options.model,
110
- sys_prompt: options.sys_prompt || "You are a helpful assistant.",
111
- thinking_budget: 38 // Use maximum thinking budget for best quality
112
- };
113
-
114
- // Thinking button state - disable for faster responses
115
- const thinkingBtnState = {
116
- enable_thinking: false
117
- };
118
-
119
- // Initial state for the conversation
120
- const stateValue = {
121
- conversation_contexts: {},
122
- conversations: [],
123
- conversation_id: this.sessionId
124
- };
125
-
126
- console.log(`🤖 Calling Qwen3 add_message with: "${message.substring(0, 50)}..."`);
127
-
128
- // Call the add_message function from the Gradio app
129
- // Based on app.py line 170: add_message(input_value, settings_form_value, thinking_btn_state_value, state_value)
130
- const result = await this.gradioClient.predict("/add_message", [
131
- message, // input_value
132
- settingsFormValue, // settings_form_value
133
- thinkingBtnState, // thinking_btn_state_value
134
- stateValue // state_value
135
- ]);
136
-
137
- console.log('🔍 Raw Qwen3 response:', result);
138
-
139
- // Extract the response text from the Gradio result
140
- if (result && result.data && Array.isArray(result.data)) {
141
- // The response format should include the chatbot data
142
- // Look for the chatbot component data (usually index 2 or 3)
143
- for (let i = 0; i < result.data.length; i++) {
144
- const item = result.data[i];
145
- if (Array.isArray(item) && item.length > 0) {
146
- // Look for the last assistant message
147
- const lastMessage = item[item.length - 1];
148
- if (lastMessage && lastMessage.role === 'assistant' && lastMessage.content) {
149
- // Extract text content from the structured content
150
- if (Array.isArray(lastMessage.content)) {
151
- for (const contentItem of lastMessage.content) {
152
- if (contentItem.type === 'text' && contentItem.content) {
153
- console.log('✅ Extracted Qwen3 response:', contentItem.content.substring(0, 100) + '...');
154
- return contentItem.content;
155
- }
156
- }
157
- } else if (typeof lastMessage.content === 'string') {
158
- console.log('✅ Extracted Qwen3 response:', lastMessage.content.substring(0, 100) + '...');
159
- return lastMessage.content;
160
- }
161
- }
162
- }
163
- }
164
- }
165
-
166
- // If we can't extract the response, throw an error to trigger fallback
167
- throw new Error('Could not extract text response from Qwen3 API result');
168
-
169
- } catch (error) {
170
- console.warn('Qwen3 Gradio API call failed, using fallback strategy:', error);
171
-
172
- // Development fallback: Generate a reasonable response based on the input
173
- // If it's a JSON generation request, provide a structured response
174
- if (message.includes('JSON') || message.includes('json') || options.sys_prompt?.includes('JSON')) {
175
- if (message.includes('monster') || message.includes('stats')) {
176
- return this.generateFallbackMonsterStats(message);
177
- }
178
- return '```json\n{"status": "Qwen3 temporarily unavailable", "using_fallback": true}\n```';
179
- }
180
-
181
- // For text generation, provide a reasonable response
182
- if (message.includes('visual description') || message.includes('image generation')) {
183
- return this.generateFallbackImageDescription(message);
184
- }
185
-
186
- return `I understand you're asking about: "${message.substring(0, 100)}..."\n\nHowever, I'm currently unable to connect to the Qwen3 service. The system will automatically fall back to an alternative model for your request.`;
187
- }
188
- }
189
-
190
- private generateFallbackMonsterStats(userMessage: string): string {
191
- // Extract key information from the user message to generate reasonable stats
192
- const isRare = userMessage.toLowerCase().includes('rare') || userMessage.toLowerCase().includes('legendary');
193
- const isCommon = userMessage.toLowerCase().includes('common') || userMessage.toLowerCase().includes('basic');
194
-
195
- let baseStats = isRare ? 70 : isCommon ? 25 : 45;
196
- let variation = isRare ? 25 : isCommon ? 15 : 20;
197
-
198
- const stats = {
199
- rarity: isRare ? 'rare' : isCommon ? 'common' : 'uncommon',
200
- picletType: 'beast', // Default fallback
201
- height: Math.round((Math.random() * 3 + 0.5) * 10) / 10,
202
- weight: Math.round((Math.random() * 100 + 10) * 10) / 10,
203
- HP: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
204
- defence: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
205
- attack: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
206
- speed: Math.round(Math.max(10, Math.min(100, baseStats + Math.random() * variation - variation/2))),
207
- monsterLore: "A mysterious creature discovered through advanced AI analysis. Its true nature remains to be studied.",
208
- specialPassiveTraitDescription: "Adaptive Resilience - This creature adapts to its environment.",
209
- attackActionName: "Strike",
210
- attackActionDescription: "A focused attack that deals moderate damage.",
211
- buffActionName: "Focus",
212
- buffActionDescription: "Increases concentration, boosting attack power temporarily.",
213
- debuffActionName: "Intimidate",
214
- debuffActionDescription: "Reduces the opponent's confidence, lowering their attack.",
215
- specialActionName: "Signature Move",
216
- specialActionDescription: "A powerful technique unique to this creature."
217
- };
218
-
219
- return '```json\n' + JSON.stringify(stats, null, 2) + '\n```';
220
- }
221
-
222
- private generateFallbackImageDescription(userMessage: string): string {
223
- // Generate a basic visual description based on common elements
224
- const colors = ['vibrant blue', 'emerald green', 'golden yellow', 'deep purple', 'crimson red'];
225
- const features = ['large expressive eyes', 'sleek form', 'distinctive markings', 'graceful limbs'];
226
-
227
- const color = colors[Math.floor(Math.random() * colors.length)];
228
- const feature = features[Math.floor(Math.random() * features.length)];
229
-
230
- return `A ${color} creature with ${feature}, designed in an anime-inspired style with clean lines and appealing proportions.`;
231
- }
232
-
233
- /**
234
- * No connection testing - let natural failures trigger fallback to Zephyr-7B
235
- */
236
- }
237
-
238
- // Export a default instance
239
- export const qwen3Client = new Qwen3Client();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/lib/services/textGenerationClient.ts DELETED
@@ -1,149 +0,0 @@
1
- /**
2
- * Text Generation Client Manager
3
- * Provides unified interface for text generation with automatic fallback
4
- * Primary: Qwen3 (Qwen/Qwen3-Demo), Fallback: Zephyr-7B (Fraser/zephyr-7b)
5
- */
6
-
7
- import { qwen3Client } from './qwen3Client';
8
-
9
- interface TextGenerationClient {
10
- predict(endpoint: string, params: any[]): Promise<{data: any[]}>;
11
- }
12
-
13
- class TextGenerationManager {
14
- private primaryClient: TextGenerationClient;
15
- private fallbackClient: TextGenerationClient | null = null;
16
- private useQwen3: boolean = true;
17
- private connectionTested: boolean = false;
18
-
19
- constructor() {
20
- this.primaryClient = qwen3Client;
21
- }
22
-
23
- /**
24
- * Set the fallback client (Zephyr-7B)
25
- */
26
- setFallbackClient(client: TextGenerationClient) {
27
- this.fallbackClient = client;
28
- }
29
-
30
- /**
31
- * Initialize without testing - assume Qwen3 is available and test on first real use
32
- */
33
- async initialize(): Promise<void> {
34
- if (this.connectionTested) return;
35
-
36
- console.log('🔧 Initializing text generation manager - using Qwen3 but will fallback to Zephyr-7B on failure');
37
-
38
- // Default to using Qwen3, test will happen on first predict() call
39
- this.useQwen3 = true;
40
- this.connectionTested = true;
41
-
42
- console.log('✅ Text generation manager initialized - ready to use Qwen3 (with fallback to Zephyr-7B)');
43
- }
44
-
45
-
46
- /**
47
- * Get the active client for text generation
48
- */
49
- private getActiveClient(): TextGenerationClient {
50
- if (this.useQwen3) {
51
- return this.primaryClient;
52
- } else if (this.fallbackClient) {
53
- return this.fallbackClient;
54
- } else {
55
- console.warn('No fallback client available, using Qwen3 client');
56
- return this.primaryClient;
57
- }
58
- }
59
-
60
- /**
61
- * Predict method with automatic fallback - tests on first failure
62
- */
63
- async predict(endpoint: string, params: any[]): Promise<{data: any[]}> {
64
- // Ensure initialization has been attempted
65
- if (!this.connectionTested) {
66
- await this.initialize();
67
- }
68
-
69
- const activeClient = this.getActiveClient();
70
- const clientName = this.useQwen3 ? 'Qwen3' : 'Zephyr-7B';
71
-
72
- console.log(`🤖 Using ${clientName} for text generation`);
73
-
74
- try {
75
- const result = await activeClient.predict(endpoint, params);
76
- return result;
77
- } catch (error) {
78
- console.error(`${clientName} prediction failed:`, error);
79
-
80
- // If primary client fails and we have a fallback, try it
81
- if (this.useQwen3 && this.fallbackClient) {
82
- console.log('🔄 Qwen3 failed, switching to fallback Zephyr-7B...');
83
- try {
84
- const fallbackResult = await this.fallbackClient.predict(endpoint, params);
85
- // Mark for future calls to use fallback
86
- this.useQwen3 = false;
87
- console.log('✅ Fallback to Zephyr-7B successful - will use Zephyr-7B for future requests');
88
- return fallbackResult;
89
- } catch (fallbackError) {
90
- console.error('Fallback client also failed:', fallbackError);
91
- throw new Error(`Both primary (${clientName}) and fallback clients failed`);
92
- }
93
- }
94
-
95
- throw error;
96
- }
97
- }
98
-
99
- /**
100
- * Force switch to Qwen3
101
- */
102
- useQwen3Client() {
103
- this.useQwen3 = true;
104
- console.log('🔄 Switched to Qwen3 client');
105
- }
106
-
107
- /**
108
- * Force switch to fallback (Zephyr-7B)
109
- */
110
- useFallbackClient() {
111
- if (this.fallbackClient) {
112
- this.useQwen3 = false;
113
- console.log('🔄 Switched to fallback (Zephyr-7B) client');
114
- } else {
115
- console.warn('No fallback client available');
116
- }
117
- }
118
-
119
- /**
120
- * Get current client status
121
- */
122
- getStatus() {
123
- return {
124
- usingQwen3: this.useQwen3,
125
- hasFallback: this.fallbackClient !== null,
126
- connectionTested: this.connectionTested,
127
- activeClient: this.useQwen3 ? 'Qwen3' : 'Zephyr-7B'
128
- };
129
- }
130
-
131
- /**
132
- * Reset connection testing to allow re-initialization
133
- */
134
- resetConnectionTest() {
135
- this.connectionTested = false;
136
- console.log('🔄 Connection test reset - will re-test on next prediction');
137
- }
138
-
139
- /**
140
- * Force re-test connection and re-initialize
141
- */
142
- async retestConnection(): Promise<void> {
143
- this.connectionTested = false;
144
- await this.initialize();
145
- }
146
- }
147
-
148
- // Export singleton instance
149
- export const textGenerationManager = new TextGenerationManager();