akhaliq HF Staff commited on
Commit
e287280
·
1 Parent(s): d142097

add ernie vl support

Browse files
Files changed (2) hide show
  1. README.md +61 -1
  2. app.py +89 -6
README.md CHANGED
@@ -93,4 +93,64 @@ The application uses:
93
  - **Hugging Face Hub**: For model inference
94
  - **ModelScope Studio**: For UI components
95
  - **OAuth Login**: Requires users to sign in with Hugging Face for code generation
96
- - **Streaming**: For real-time code generation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  - **Hugging Face Hub**: For model inference
94
  - **ModelScope Studio**: For UI components
95
  - **OAuth Login**: Requires users to sign in with Hugging Face for code generation
96
+ - **Streaming**: For real-time code generation
97
+
98
+ # Hugging Face Coder
99
+
100
+ A Gradio-based application that uses Hugging Face models to generate code based on user requirements. The app supports both text-only and multimodal (text + image) code generation.
101
+
102
+ ## Features
103
+
104
+ - **Multiple Model Support**: DeepSeek V3, DeepSeek R1, and ERNIE-4.5-VL
105
+ - **Multimodal Input**: Upload images to help describe your requirements
106
+ - **Real-time Code Generation**: Stream responses from the models
107
+ - **Live Preview**: See your generated code in action with the built-in sandbox
108
+ - **History Management**: Keep track of your previous generations
109
+ - **Example Templates**: Quick-start with predefined application templates
110
+
111
+ ## Setup
112
+
113
+ 1. Install dependencies:
114
+ ```bash
115
+ pip install -r requirements.txt
116
+ ```
117
+
118
+ 2. Set your Hugging Face API token as an environment variable:
119
+ ```bash
120
+ export HF_TOKEN="your_huggingface_token_here"
121
+ ```
122
+
123
+ 3. Run the application:
124
+ ```bash
125
+ python app.py
126
+ ```
127
+
128
+ ## Usage
129
+
130
+ 1. **Text-only Generation**: Simply type your requirements in the text area
131
+ 2. **Multimodal Generation**: Upload an image and describe what you want to create
132
+ 3. **Model Selection**: Switch between different models using the model selector
133
+ 4. **Examples**: Use the provided example templates to get started quickly
134
+
135
+ ## Supported Models
136
+
137
+ - **DeepSeek V3**: General code generation
138
+ - **DeepSeek R1**: Advanced code generation
139
+ - **ERNIE-4.5-VL**: Multimodal code generation with image understanding
140
+
141
+ ## Environment Variables
142
+
143
+ - `HF_TOKEN`: Your Hugging Face API token (required)
144
+
145
+ ## Examples
146
+
147
+ - Todo App
148
+ - Calculator
149
+ - Weather Dashboard
150
+ - Chat Interface
151
+ - E-commerce Product Card
152
+ - Login Form
153
+ - Dashboard Layout
154
+ - Data Table
155
+ - Image Gallery
156
+ - UI from Image (multimodal)
app.py CHANGED
@@ -20,6 +20,8 @@ When asked to create an application, you should:
20
  4. Include necessary comments and documentation
21
  5. Ensure the code is functional and follows best practices
22
 
 
 
23
  Always respond with code that can be executed or rendered directly.
24
 
25
  Always output only the HTML code inside a ```html ... ``` code block, and do not include any explanations or extra text."""
@@ -35,6 +37,11 @@ AVAILABLE_MODELS = [
35
  "name": "DeepSeek R1",
36
  "id": "deepseek-ai/DeepSeek-R1-0528",
37
  "description": "DeepSeek R1 model for code generation"
 
 
 
 
 
38
  }
39
  ]
40
 
@@ -70,6 +77,14 @@ DEMO_LIST = [
70
  {
71
  "title": "Data Table",
72
  "description": "Build a data table with sorting and filtering capabilities"
 
 
 
 
 
 
 
 
73
  }
74
  ]
75
 
@@ -87,7 +102,17 @@ Messages = List[Dict[str, str]]
87
  def history_to_messages(history: History, system: str) -> Messages:
88
  messages = [{'role': 'system', 'content': system}]
89
  for h in history:
90
- messages.append({'role': 'user', 'content': h[0]})
 
 
 
 
 
 
 
 
 
 
91
  messages.append({'role': 'assistant', 'content': h[1]})
92
  return messages
93
 
@@ -95,7 +120,16 @@ def messages_to_history(messages: Messages) -> Tuple[str, History]:
95
  assert messages[0]['role'] == 'system'
96
  history = []
97
  for q, r in zip(messages[1::2], messages[2::2]):
98
- history.append([q['content'], r['content']])
 
 
 
 
 
 
 
 
 
99
  return history
100
 
101
  def remove_code_block(text):
@@ -121,6 +155,46 @@ def history_render(history: History):
121
  def clear_history():
122
  return []
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  def send_to_sandbox(code):
125
  # Add a wrapper to inject necessary permissions and ensure full HTML
126
  wrapped_code = f"""
@@ -207,6 +281,7 @@ with gr.Blocks(css_paths="app.css") as demo:
207
  current_model_display = gr.Markdown("**Current Model:** DeepSeek V3", visible=False)
208
  input = antd.InputTextarea(
209
  size="large", allow_clear=True, placeholder="Please enter what kind of application you want", visible=False)
 
210
  btn = antd.Button("send", type="primary", size="large", visible=False)
211
  clear_btn = antd.Button("clear history", type="default", size="large", visible=False)
212
 
@@ -215,7 +290,7 @@ with gr.Blocks(css_paths="app.css") as demo:
215
  for i, demo_item in enumerate(DEMO_LIST):
216
  with antd.Card(hoverable=True, title=demo_item["title"]) as demoCard:
217
  antd.CardMeta(description=demo_item["description"])
218
- demoCard.click(lambda e, idx=i: DEMO_LIST[idx]['description'], outputs=[input])
219
 
220
  antd.Divider("setting", visible=False)
221
  with antd.Flex(gap="small", wrap=True, visible=False) as setting_flex:
@@ -285,6 +360,7 @@ with gr.Blocks(css_paths="app.css") as demo:
285
  gr.update(visible=False),
286
  gr.update(visible=False),
287
  gr.update(visible=False),
 
288
  )
289
  else:
290
  return (
@@ -299,9 +375,10 @@ with gr.Blocks(css_paths="app.css") as demo:
299
  gr.update(visible=True),
300
  gr.update(visible=True),
301
  gr.update(visible=True),
 
302
  )
303
 
304
- def generation_code(query: Optional[str], _setting: Dict[str, str], _history: Optional[History], profile: gr.OAuthProfile | None, _current_model: Dict):
305
  if profile is None:
306
  return (
307
  "Please sign in with Hugging Face to use this feature.",
@@ -315,7 +392,12 @@ with gr.Blocks(css_paths="app.css") as demo:
315
  if _history is None:
316
  _history = []
317
  messages = history_to_messages(_history, _setting['system'])
318
- messages.append({'role': 'user', 'content': query})
 
 
 
 
 
319
 
320
  try:
321
  completion = client.chat.completions.create(
@@ -358,7 +440,7 @@ with gr.Blocks(css_paths="app.css") as demo:
358
 
359
  btn.click(
360
  generation_code,
361
- inputs=[input, setting, history, current_model],
362
  outputs=[code_output, history, sandbox, state_tab, code_drawer]
363
  )
364
 
@@ -370,6 +452,7 @@ with gr.Blocks(css_paths="app.css") as demo:
370
  outputs=[
371
  login_message,
372
  input,
 
373
  current_model_display,
374
  btn,
375
  clear_btn,
 
20
  4. Include necessary comments and documentation
21
  5. Ensure the code is functional and follows best practices
22
 
23
+ If an image is provided, analyze it and use the visual information to better understand the user's requirements.
24
+
25
  Always respond with code that can be executed or rendered directly.
26
 
27
  Always output only the HTML code inside a ```html ... ``` code block, and do not include any explanations or extra text."""
 
37
  "name": "DeepSeek R1",
38
  "id": "deepseek-ai/DeepSeek-R1-0528",
39
  "description": "DeepSeek R1 model for code generation"
40
+ },
41
+ {
42
+ "name": "ERNIE-4.5-VL",
43
+ "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT",
44
+ "description": "ERNIE-4.5-VL model for multimodal code generation with image support"
45
  }
46
  ]
47
 
 
77
  {
78
  "title": "Data Table",
79
  "description": "Build a data table with sorting and filtering capabilities"
80
+ },
81
+ {
82
+ "title": "Image Gallery",
83
+ "description": "Create an image gallery with lightbox functionality and responsive grid layout"
84
+ },
85
+ {
86
+ "title": "UI from Image",
87
+ "description": "Upload an image of a UI design and I'll generate the HTML/CSS code for it"
88
  }
89
  ]
90
 
 
102
  def history_to_messages(history: History, system: str) -> Messages:
103
  messages = [{'role': 'system', 'content': system}]
104
  for h in history:
105
+ # Handle multimodal content in history
106
+ user_content = h[0]
107
+ if isinstance(user_content, list):
108
+ # Extract text from multimodal content
109
+ text_content = ""
110
+ for item in user_content:
111
+ if isinstance(item, dict) and item.get("type") == "text":
112
+ text_content += item.get("text", "")
113
+ user_content = text_content if text_content else str(user_content)
114
+
115
+ messages.append({'role': 'user', 'content': user_content})
116
  messages.append({'role': 'assistant', 'content': h[1]})
117
  return messages
118
 
 
120
  assert messages[0]['role'] == 'system'
121
  history = []
122
  for q, r in zip(messages[1::2], messages[2::2]):
123
+ # Extract text content from multimodal messages for history
124
+ user_content = q['content']
125
+ if isinstance(user_content, list):
126
+ text_content = ""
127
+ for item in user_content:
128
+ if isinstance(item, dict) and item.get("type") == "text":
129
+ text_content += item.get("text", "")
130
+ user_content = text_content if text_content else str(user_content)
131
+
132
+ history.append([user_content, r['content']])
133
  return history
134
 
135
  def remove_code_block(text):
 
155
  def clear_history():
156
  return []
157
 
158
+ def process_image_for_model(image):
159
+ """Convert image to base64 for model input"""
160
+ if image is None:
161
+ return None
162
+
163
+ # Convert numpy array to PIL Image if needed
164
+ import io
165
+ import base64
166
+ import numpy as np
167
+ from PIL import Image
168
+
169
+ # Handle numpy array from Gradio
170
+ if isinstance(image, np.ndarray):
171
+ image = Image.fromarray(image)
172
+
173
+ buffer = io.BytesIO()
174
+ image.save(buffer, format='PNG')
175
+ img_str = base64.b64encode(buffer.getvalue()).decode()
176
+ return f"data:image/png;base64,{img_str}"
177
+
178
+ def create_multimodal_message(text, image=None):
179
+ """Create a multimodal message with text and optional image"""
180
+ if image is None:
181
+ return {"role": "user", "content": text}
182
+
183
+ content = [
184
+ {
185
+ "type": "text",
186
+ "text": text
187
+ },
188
+ {
189
+ "type": "image_url",
190
+ "image_url": {
191
+ "url": process_image_for_model(image)
192
+ }
193
+ }
194
+ ]
195
+
196
+ return {"role": "user", "content": content}
197
+
198
  def send_to_sandbox(code):
199
  # Add a wrapper to inject necessary permissions and ensure full HTML
200
  wrapped_code = f"""
 
281
  current_model_display = gr.Markdown("**Current Model:** DeepSeek V3", visible=False)
282
  input = antd.InputTextarea(
283
  size="large", allow_clear=True, placeholder="Please enter what kind of application you want", visible=False)
284
+ image_input = gr.Image(label="Upload an image (optional)", visible=False)
285
  btn = antd.Button("send", type="primary", size="large", visible=False)
286
  clear_btn = antd.Button("clear history", type="default", size="large", visible=False)
287
 
 
290
  for i, demo_item in enumerate(DEMO_LIST):
291
  with antd.Card(hoverable=True, title=demo_item["title"]) as demoCard:
292
  antd.CardMeta(description=demo_item["description"])
293
+ demoCard.click(lambda e, idx=i: (DEMO_LIST[idx]['description'], None), outputs=[input, image_input])
294
 
295
  antd.Divider("setting", visible=False)
296
  with antd.Flex(gap="small", wrap=True, visible=False) as setting_flex:
 
360
  gr.update(visible=False),
361
  gr.update(visible=False),
362
  gr.update(visible=False),
363
+ gr.update(visible=False),
364
  )
365
  else:
366
  return (
 
375
  gr.update(visible=True),
376
  gr.update(visible=True),
377
  gr.update(visible=True),
378
+ gr.update(visible=True),
379
  )
380
 
381
+ def generation_code(query: Optional[str], image: Optional[gr.Image], _setting: Dict[str, str], _history: Optional[History], profile: gr.OAuthProfile | None, _current_model: Dict):
382
  if profile is None:
383
  return (
384
  "Please sign in with Hugging Face to use this feature.",
 
392
  if _history is None:
393
  _history = []
394
  messages = history_to_messages(_history, _setting['system'])
395
+
396
+ # Create multimodal message if image is provided
397
+ if image is not None:
398
+ messages.append(create_multimodal_message(query, image))
399
+ else:
400
+ messages.append({'role': 'user', 'content': query})
401
 
402
  try:
403
  completion = client.chat.completions.create(
 
440
 
441
  btn.click(
442
  generation_code,
443
+ inputs=[input, image_input, setting, history, current_model],
444
  outputs=[code_output, history, sandbox, state_tab, code_drawer]
445
  )
446
 
 
452
  outputs=[
453
  login_message,
454
  input,
455
+ image_input,
456
  current_model_display,
457
  btn,
458
  clear_btn,