prithivMLmods commited on
Commit
6a44e02
·
verified ·
1 Parent(s): b0ba3ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -40
app.py CHANGED
@@ -28,9 +28,6 @@ from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
28
  from diffusers import ShapEImg2ImgPipeline, ShapEPipeline
29
  from diffusers.utils import export_to_ply
30
 
31
- # -----------------------------------------------------------------------------
32
- # Global constants and helper functions
33
- # -----------------------------------------------------------------------------
34
 
35
  MAX_SEED = np.iinfo(np.int32).max
36
 
@@ -39,10 +36,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
39
  seed = random.randint(0, MAX_SEED)
40
  return seed
41
 
42
- # -----------------------------------------------------------------------------
43
- # Model class for Text-to-3D Generation (ShapE)
44
- # -----------------------------------------------------------------------------
45
-
46
  class Model:
47
  def __init__(self):
48
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -100,10 +93,6 @@ class Model:
100
  export_to_ply(images[0], ply_path.name)
101
  return self.to_glb(ply_path.name)
102
 
103
- # -----------------------------------------------------------------------------
104
- # Gradio UI configuration
105
- # -----------------------------------------------------------------------------
106
-
107
  DESCRIPTION = """
108
  # QwQ Edge 💬
109
  """
@@ -128,10 +117,6 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
128
 
129
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
130
 
131
- # -----------------------------------------------------------------------------
132
- # Load Models and Pipelines for Chat, Image, and Multimodal Processing
133
- # -----------------------------------------------------------------------------
134
-
135
  # Load the text-only model and tokenizer (for pure text chat)
136
  model_id = "prithivMLmods/FastThink-0.5B-Tiny"
137
  tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -157,20 +142,12 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
157
  torch_dtype=torch.float16
158
  ).to("cuda").eval()
159
 
160
- # -----------------------------------------------------------------------------
161
- # Asynchronous text-to-speech
162
- # -----------------------------------------------------------------------------
163
-
164
  async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
165
  """Convert text to speech using Edge TTS and save as MP3"""
166
  communicate = edge_tts.Communicate(text, voice)
167
  await communicate.save(output_file)
168
  return output_file
169
 
170
- # -----------------------------------------------------------------------------
171
- # Utility function to clean conversation history
172
- # -----------------------------------------------------------------------------
173
-
174
  def clean_chat_history(chat_history):
175
  """
176
  Filter out any chat entries whose "content" is not a string.
@@ -182,10 +159,6 @@ def clean_chat_history(chat_history):
182
  cleaned.append(msg)
183
  return cleaned
184
 
185
- # -----------------------------------------------------------------------------
186
- # Stable Diffusion XL Pipeline for Image Generation
187
- # -----------------------------------------------------------------------------
188
-
189
  MODEL_ID_SD = os.getenv("MODEL_VAL_PATH") # SDXL Model repository path via env variable
190
  MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
191
  USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
@@ -263,9 +236,6 @@ def generate_image_fn(
263
  image_paths = [save_image(img) for img in images]
264
  return image_paths, seed
265
 
266
- # -----------------------------------------------------------------------------
267
- # Text-to-3D Generation using the ShapE Pipeline
268
- # -----------------------------------------------------------------------------
269
 
270
  @spaces.GPU(duration=120, enable_queue=True)
271
  def generate_3d_fn(
@@ -284,10 +254,6 @@ def generate_3d_fn(
284
  glb_path = model3d.run_text(prompt, seed=seed, guidance_scale=guidance_scale, num_steps=num_steps)
285
  return glb_path, seed
286
 
287
- # -----------------------------------------------------------------------------
288
- # Chat Generation Function with support for @tts, @image, and @3d commands
289
- # -----------------------------------------------------------------------------
290
-
291
  @spaces.GPU
292
  def generate(
293
  input_dict: dict,
@@ -420,10 +386,6 @@ def generate(
420
  output_file = asyncio.run(text_to_speech(final_response, voice))
421
  yield gr.Audio(output_file, autoplay=True)
422
 
423
- # -----------------------------------------------------------------------------
424
- # Gradio Chat Interface Setup and Launch
425
- # -----------------------------------------------------------------------------
426
-
427
  demo = gr.ChatInterface(
428
  fn=generate,
429
  additional_inputs=[
@@ -435,10 +397,9 @@ demo = gr.ChatInterface(
435
  ],
436
  examples=[
437
  ["@tts1 Who is Nikola Tesla, and why did he die?"],
438
- [{"text": "Extract JSON from the image", "files": ["examples/document.jpg"]}],
439
  [{"text": "summarize the letter", "files": ["examples/1.png"]}],
440
  ["@image Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"],
441
- ["@3d A futuristic city skyline in the style of cyberpunk"],
442
  ["Write a Python function to check if a number is prime."],
443
  ["@tts2 What causes rainbows to form?"],
444
  ],
 
28
  from diffusers import ShapEImg2ImgPipeline, ShapEPipeline
29
  from diffusers.utils import export_to_ply
30
 
 
 
 
31
 
32
  MAX_SEED = np.iinfo(np.int32).max
33
 
 
36
  seed = random.randint(0, MAX_SEED)
37
  return seed
38
 
 
 
 
 
39
  class Model:
40
  def __init__(self):
41
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
93
  export_to_ply(images[0], ply_path.name)
94
  return self.to_glb(ply_path.name)
95
 
 
 
 
 
96
  DESCRIPTION = """
97
  # QwQ Edge 💬
98
  """
 
117
 
118
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
119
 
 
 
 
 
120
  # Load the text-only model and tokenizer (for pure text chat)
121
  model_id = "prithivMLmods/FastThink-0.5B-Tiny"
122
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
142
  torch_dtype=torch.float16
143
  ).to("cuda").eval()
144
 
 
 
 
 
145
  async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
146
  """Convert text to speech using Edge TTS and save as MP3"""
147
  communicate = edge_tts.Communicate(text, voice)
148
  await communicate.save(output_file)
149
  return output_file
150
 
 
 
 
 
151
  def clean_chat_history(chat_history):
152
  """
153
  Filter out any chat entries whose "content" is not a string.
 
159
  cleaned.append(msg)
160
  return cleaned
161
 
 
 
 
 
162
  MODEL_ID_SD = os.getenv("MODEL_VAL_PATH") # SDXL Model repository path via env variable
163
  MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
164
  USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 
236
  image_paths = [save_image(img) for img in images]
237
  return image_paths, seed
238
 
 
 
 
239
 
240
  @spaces.GPU(duration=120, enable_queue=True)
241
  def generate_3d_fn(
 
254
  glb_path = model3d.run_text(prompt, seed=seed, guidance_scale=guidance_scale, num_steps=num_steps)
255
  return glb_path, seed
256
 
 
 
 
 
257
  @spaces.GPU
258
  def generate(
259
  input_dict: dict,
 
386
  output_file = asyncio.run(text_to_speech(final_response, voice))
387
  yield gr.Audio(output_file, autoplay=True)
388
 
 
 
 
 
389
  demo = gr.ChatInterface(
390
  fn=generate,
391
  additional_inputs=[
 
397
  ],
398
  examples=[
399
  ["@tts1 Who is Nikola Tesla, and why did he die?"],
400
+ ["@3d A birthday cupcake with cherry"],
401
  [{"text": "summarize the letter", "files": ["examples/1.png"]}],
402
  ["@image Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"],
 
403
  ["Write a Python function to check if a number is prime."],
404
  ["@tts2 What causes rainbows to form?"],
405
  ],