Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -28,9 +28,6 @@ from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
|
28 |
from diffusers import ShapEImg2ImgPipeline, ShapEPipeline
|
29 |
from diffusers.utils import export_to_ply
|
30 |
|
31 |
-
# -----------------------------------------------------------------------------
|
32 |
-
# Global constants and helper functions
|
33 |
-
# -----------------------------------------------------------------------------
|
34 |
|
35 |
MAX_SEED = np.iinfo(np.int32).max
|
36 |
|
@@ -39,10 +36,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
|
39 |
seed = random.randint(0, MAX_SEED)
|
40 |
return seed
|
41 |
|
42 |
-
# -----------------------------------------------------------------------------
|
43 |
-
# Model class for Text-to-3D Generation (ShapE)
|
44 |
-
# -----------------------------------------------------------------------------
|
45 |
-
|
46 |
class Model:
|
47 |
def __init__(self):
|
48 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
@@ -100,10 +93,6 @@ class Model:
|
|
100 |
export_to_ply(images[0], ply_path.name)
|
101 |
return self.to_glb(ply_path.name)
|
102 |
|
103 |
-
# -----------------------------------------------------------------------------
|
104 |
-
# Gradio UI configuration
|
105 |
-
# -----------------------------------------------------------------------------
|
106 |
-
|
107 |
DESCRIPTION = """
|
108 |
# QwQ Edge 💬
|
109 |
"""
|
@@ -128,10 +117,6 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
128 |
|
129 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
130 |
|
131 |
-
# -----------------------------------------------------------------------------
|
132 |
-
# Load Models and Pipelines for Chat, Image, and Multimodal Processing
|
133 |
-
# -----------------------------------------------------------------------------
|
134 |
-
|
135 |
# Load the text-only model and tokenizer (for pure text chat)
|
136 |
model_id = "prithivMLmods/FastThink-0.5B-Tiny"
|
137 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
@@ -157,20 +142,12 @@ model_m = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
157 |
torch_dtype=torch.float16
|
158 |
).to("cuda").eval()
|
159 |
|
160 |
-
# -----------------------------------------------------------------------------
|
161 |
-
# Asynchronous text-to-speech
|
162 |
-
# -----------------------------------------------------------------------------
|
163 |
-
|
164 |
async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
|
165 |
"""Convert text to speech using Edge TTS and save as MP3"""
|
166 |
communicate = edge_tts.Communicate(text, voice)
|
167 |
await communicate.save(output_file)
|
168 |
return output_file
|
169 |
|
170 |
-
# -----------------------------------------------------------------------------
|
171 |
-
# Utility function to clean conversation history
|
172 |
-
# -----------------------------------------------------------------------------
|
173 |
-
|
174 |
def clean_chat_history(chat_history):
|
175 |
"""
|
176 |
Filter out any chat entries whose "content" is not a string.
|
@@ -182,10 +159,6 @@ def clean_chat_history(chat_history):
|
|
182 |
cleaned.append(msg)
|
183 |
return cleaned
|
184 |
|
185 |
-
# -----------------------------------------------------------------------------
|
186 |
-
# Stable Diffusion XL Pipeline for Image Generation
|
187 |
-
# -----------------------------------------------------------------------------
|
188 |
-
|
189 |
MODEL_ID_SD = os.getenv("MODEL_VAL_PATH") # SDXL Model repository path via env variable
|
190 |
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
|
191 |
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
|
@@ -263,9 +236,6 @@ def generate_image_fn(
|
|
263 |
image_paths = [save_image(img) for img in images]
|
264 |
return image_paths, seed
|
265 |
|
266 |
-
# -----------------------------------------------------------------------------
|
267 |
-
# Text-to-3D Generation using the ShapE Pipeline
|
268 |
-
# -----------------------------------------------------------------------------
|
269 |
|
270 |
@spaces.GPU(duration=120, enable_queue=True)
|
271 |
def generate_3d_fn(
|
@@ -284,10 +254,6 @@ def generate_3d_fn(
|
|
284 |
glb_path = model3d.run_text(prompt, seed=seed, guidance_scale=guidance_scale, num_steps=num_steps)
|
285 |
return glb_path, seed
|
286 |
|
287 |
-
# -----------------------------------------------------------------------------
|
288 |
-
# Chat Generation Function with support for @tts, @image, and @3d commands
|
289 |
-
# -----------------------------------------------------------------------------
|
290 |
-
|
291 |
@spaces.GPU
|
292 |
def generate(
|
293 |
input_dict: dict,
|
@@ -420,10 +386,6 @@ def generate(
|
|
420 |
output_file = asyncio.run(text_to_speech(final_response, voice))
|
421 |
yield gr.Audio(output_file, autoplay=True)
|
422 |
|
423 |
-
# -----------------------------------------------------------------------------
|
424 |
-
# Gradio Chat Interface Setup and Launch
|
425 |
-
# -----------------------------------------------------------------------------
|
426 |
-
|
427 |
demo = gr.ChatInterface(
|
428 |
fn=generate,
|
429 |
additional_inputs=[
|
@@ -435,10 +397,9 @@ demo = gr.ChatInterface(
|
|
435 |
],
|
436 |
examples=[
|
437 |
["@tts1 Who is Nikola Tesla, and why did he die?"],
|
438 |
-
[
|
439 |
[{"text": "summarize the letter", "files": ["examples/1.png"]}],
|
440 |
["@image Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"],
|
441 |
-
["@3d A futuristic city skyline in the style of cyberpunk"],
|
442 |
["Write a Python function to check if a number is prime."],
|
443 |
["@tts2 What causes rainbows to form?"],
|
444 |
],
|
|
|
28 |
from diffusers import ShapEImg2ImgPipeline, ShapEPipeline
|
29 |
from diffusers.utils import export_to_ply
|
30 |
|
|
|
|
|
|
|
31 |
|
32 |
MAX_SEED = np.iinfo(np.int32).max
|
33 |
|
|
|
36 |
seed = random.randint(0, MAX_SEED)
|
37 |
return seed
|
38 |
|
|
|
|
|
|
|
|
|
39 |
class Model:
|
40 |
def __init__(self):
|
41 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
93 |
export_to_ply(images[0], ply_path.name)
|
94 |
return self.to_glb(ply_path.name)
|
95 |
|
|
|
|
|
|
|
|
|
96 |
DESCRIPTION = """
|
97 |
# QwQ Edge 💬
|
98 |
"""
|
|
|
117 |
|
118 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
119 |
|
|
|
|
|
|
|
|
|
120 |
# Load the text-only model and tokenizer (for pure text chat)
|
121 |
model_id = "prithivMLmods/FastThink-0.5B-Tiny"
|
122 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
142 |
torch_dtype=torch.float16
|
143 |
).to("cuda").eval()
|
144 |
|
|
|
|
|
|
|
|
|
145 |
async def text_to_speech(text: str, voice: str, output_file="output.mp3"):
|
146 |
"""Convert text to speech using Edge TTS and save as MP3"""
|
147 |
communicate = edge_tts.Communicate(text, voice)
|
148 |
await communicate.save(output_file)
|
149 |
return output_file
|
150 |
|
|
|
|
|
|
|
|
|
151 |
def clean_chat_history(chat_history):
|
152 |
"""
|
153 |
Filter out any chat entries whose "content" is not a string.
|
|
|
159 |
cleaned.append(msg)
|
160 |
return cleaned
|
161 |
|
|
|
|
|
|
|
|
|
162 |
MODEL_ID_SD = os.getenv("MODEL_VAL_PATH") # SDXL Model repository path via env variable
|
163 |
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
|
164 |
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
|
|
|
236 |
image_paths = [save_image(img) for img in images]
|
237 |
return image_paths, seed
|
238 |
|
|
|
|
|
|
|
239 |
|
240 |
@spaces.GPU(duration=120, enable_queue=True)
|
241 |
def generate_3d_fn(
|
|
|
254 |
glb_path = model3d.run_text(prompt, seed=seed, guidance_scale=guidance_scale, num_steps=num_steps)
|
255 |
return glb_path, seed
|
256 |
|
|
|
|
|
|
|
|
|
257 |
@spaces.GPU
|
258 |
def generate(
|
259 |
input_dict: dict,
|
|
|
386 |
output_file = asyncio.run(text_to_speech(final_response, voice))
|
387 |
yield gr.Audio(output_file, autoplay=True)
|
388 |
|
|
|
|
|
|
|
|
|
389 |
demo = gr.ChatInterface(
|
390 |
fn=generate,
|
391 |
additional_inputs=[
|
|
|
397 |
],
|
398 |
examples=[
|
399 |
["@tts1 Who is Nikola Tesla, and why did he die?"],
|
400 |
+
["@3d A birthday cupcake with cherry"],
|
401 |
[{"text": "summarize the letter", "files": ["examples/1.png"]}],
|
402 |
["@image Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic"],
|
|
|
403 |
["Write a Python function to check if a number is prime."],
|
404 |
["@tts2 What causes rainbows to form?"],
|
405 |
],
|