geyik1 commited on
Commit
82d06f9
·
verified ·
1 Parent(s): 92c4fcc

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +214 -0
  2. requirements.txt +42 -0
app.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from diffusers import ShapEPipeline, ShapEImg2ImgPipeline
4
+ from diffusers.utils import export_to_gif
5
+ import os
6
+ from huggingface_hub import HfApi, login
7
+ from PIL import Image
8
+ import numpy as np
9
+ import gc
10
+
11
+ # Force CPU usage
12
+ device = "cpu"
13
+ torch.set_num_threads(4)
14
+ print(f"Using device: {device}")
15
+
16
+ def validate_token(token):
17
+ try:
18
+ login(token=token)
19
+ return True
20
+ except Exception as e:
21
+ print(f"Token validation error: {str(e)}")
22
+ return False
23
+
24
+ def generate_3d_from_text(prompt, token, guidance_scale=7.0, export_format="obj", progress=gr.Progress()):
25
+ try:
26
+ if not validate_token(token):
27
+ return gr.update(value="Invalid Hugging Face token"), None, None
28
+
29
+ print(f"Starting generation: {prompt}")
30
+ progress(0.1, desc="Loading model...")
31
+
32
+ pipe = ShapEPipeline.from_pretrained(
33
+ "openai/shap-e",
34
+ torch_dtype=torch.float32,
35
+ token=token,
36
+ revision="main",
37
+ low_cpu_mem_usage=True
38
+ )
39
+
40
+ os.makedirs("outputs", exist_ok=True)
41
+ safe_prompt = "".join(x for x in prompt if x.isalnum() or x in (" ", "-", "_"))
42
+ base_filename = f"outputs/{safe_prompt}"
43
+
44
+ try:
45
+ progress(0.3, desc="Creating 3D model...")
46
+ with torch.no_grad():
47
+ output = pipe(
48
+ prompt,
49
+ guidance_scale=min(guidance_scale, 10.0),
50
+ num_inference_steps=16
51
+ )
52
+
53
+ progress(0.5, desc="Creating GIF...")
54
+ gif_path = export_to_gif(output.images, f"{base_filename}.gif")
55
+
56
+ progress(0.7, desc="Creating 3D mesh...")
57
+ mesh_output = pipe(
58
+ prompt,
59
+ guidance_scale=min(guidance_scale, 10.0),
60
+ num_inference_steps=16,
61
+ output_type="mesh"
62
+ )
63
+
64
+ progress(0.9, desc="Saving files...")
65
+ output_path = f"{base_filename}.{export_format}"
66
+ mesh_output.meshes[0].export(output_path)
67
+
68
+ del pipe
69
+ del output
70
+ del mesh_output
71
+ gc.collect()
72
+
73
+ print(f"Generation completed: {output_path}")
74
+ progress(1.0, desc="Completed!")
75
+ return gr.update(value="Generation successful!"), gr.update(value=gif_path), gr.update(value=output_path)
76
+
77
+ except Exception as model_error:
78
+ error_msg = f"Model execution error: {str(model_error)}"
79
+ print(error_msg)
80
+ return gr.update(value=error_msg), None, None
81
+
82
+ except Exception as e:
83
+ error_msg = f"General error: {str(e)}"
84
+ print(error_msg)
85
+ return gr.update(value=error_msg), None, None
86
+
87
+ def generate_3d_from_image(image, token, guidance_scale=7.0, export_format="obj", progress=gr.Progress()):
88
+ try:
89
+ if not validate_token(token):
90
+ return gr.update(value="Invalid Hugging Face token"), None, None
91
+
92
+ print("Starting image to 3D generation")
93
+ progress(0.1, desc="Loading model...")
94
+
95
+ pipe = ShapEImg2ImgPipeline.from_pretrained(
96
+ "openai/shap-e-img2img",
97
+ torch_dtype=torch.float32,
98
+ token=token,
99
+ revision="main",
100
+ low_cpu_mem_usage=True
101
+ )
102
+
103
+ os.makedirs("outputs", exist_ok=True)
104
+
105
+ import time
106
+ timestamp = int(time.time())
107
+ base_filename = f"outputs/image_to_3d_{timestamp}"
108
+
109
+ try:
110
+ progress(0.3, desc="Preparing image...")
111
+ if isinstance(image, str):
112
+ image = Image.open(image)
113
+ elif isinstance(image, np.ndarray):
114
+ image = Image.fromarray(image)
115
+
116
+ image = image.resize((128, 128))
117
+
118
+ progress(0.5, desc="Creating 3D model...")
119
+ with torch.no_grad():
120
+ output = pipe(
121
+ image=image,
122
+ guidance_scale=min(guidance_scale, 10.0),
123
+ num_inference_steps=16
124
+ )
125
+
126
+ progress(0.7, desc="Creating GIF...")
127
+ gif_path = export_to_gif(output.images, f"{base_filename}.gif")
128
+
129
+ progress(0.8, desc="Creating 3D mesh...")
130
+ mesh_output = pipe(
131
+ image=image,
132
+ guidance_scale=min(guidance_scale, 10.0),
133
+ num_inference_steps=16,
134
+ output_type="mesh"
135
+ )
136
+
137
+ progress(0.9, desc="Saving files...")
138
+ output_path = f"{base_filename}.{export_format}"
139
+ mesh_output.meshes[0].export(output_path)
140
+
141
+ del pipe
142
+ del output
143
+ del mesh_output
144
+ gc.collect()
145
+
146
+ print(f"Generation completed: {output_path}")
147
+ progress(1.0, desc="Completed!")
148
+ return gr.update(value="Generation successful!"), gr.update(value=gif_path), gr.update(value=output_path)
149
+
150
+ except Exception as model_error:
151
+ error_msg = f"Model execution error: {str(model_error)}"
152
+ print(error_msg)
153
+ return gr.update(value=error_msg), None, None
154
+
155
+ except Exception as e:
156
+ error_msg = f"General error: {str(e)}"
157
+ print(error_msg)
158
+ return gr.update(value=error_msg), None, None
159
+
160
+ with gr.Blocks(theme=gr.themes.Soft()) as interface:
161
+ gr.Markdown("# SORA-3D - Text/Image to 3D Model Generator")
162
+ gr.Markdown("Create 3D models from text or image input. You need a Hugging Face token to use this app.")
163
+ gr.Markdown("""
164
+ > **Important Notes**:
165
+ > - Processing time may be longer on CPU
166
+ > - Keep guidance scale under 10 for faster results
167
+ > - Number of steps is fixed at 16
168
+ > - Image size is optimized for quality/speed
169
+ """)
170
+
171
+ with gr.Tab("Text → 3D"):
172
+ with gr.Row():
173
+ with gr.Column():
174
+ text_input = gr.Textbox(label="Enter description for 3D model", scale=2)
175
+ text_token = gr.Textbox(label="Hugging Face Token", type="password", scale=2)
176
+ with gr.Row():
177
+ text_guidance = gr.Slider(minimum=1, maximum=10, value=7, label="Guidance Scale", scale=1)
178
+ text_format = gr.Radio(["obj", "glb"], label="Export Format", value="obj", scale=1)
179
+ text_button = gr.Button("Generate", variant="primary")
180
+
181
+ with gr.Column():
182
+ text_status = gr.Textbox(label="Status", interactive=False)
183
+ text_preview = gr.Image(label="3D Preview (GIF)", interactive=False)
184
+ text_file = gr.File(label="3D Model File")
185
+
186
+ with gr.Tab("Image → 3D"):
187
+ with gr.Row():
188
+ with gr.Column():
189
+ image_input = gr.Image(label="Image to convert to 3D", type="pil", scale=2)
190
+ image_token = gr.Textbox(label="Hugging Face Token", type="password", scale=2)
191
+ with gr.Row():
192
+ image_guidance = gr.Slider(minimum=1, maximum=10, value=7, label="Guidance Scale", scale=1)
193
+ image_format = gr.Radio(["obj", "glb"], label="Export Format", value="obj", scale=1)
194
+ image_button = gr.Button("Generate", variant="primary")
195
+
196
+ with gr.Column():
197
+ image_status = gr.Textbox(label="Status", interactive=False)
198
+ image_preview = gr.Image(label="3D Preview (GIF)", interactive=False)
199
+ image_file = gr.File(label="3D Model File")
200
+
201
+ text_button.click(
202
+ generate_3d_from_text,
203
+ inputs=[text_input, text_token, text_guidance, text_format],
204
+ outputs=[text_status, text_preview, text_file]
205
+ )
206
+
207
+ image_button.click(
208
+ generate_3d_from_image,
209
+ inputs=[image_input, image_token, image_guidance, image_format],
210
+ outputs=[image_status, image_preview, image_file]
211
+ )
212
+
213
+ if __name__ == "__main__":
214
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu121
2
+ --find-links https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.4.0_cu121.html
3
+
4
+ torch==2.4.0
5
+ torchvision==0.19.0
6
+ pillow==10.4.0
7
+ imageio==2.36.1
8
+ imageio-ffmpeg==0.5.1
9
+ tqdm==4.67.1
10
+ easydict==1.13
11
+ opencv-python-headless==4.10.0.84
12
+ scipy==1.14.1
13
+ rembg==2.0.60
14
+ onnxruntime==1.20.1
15
+ trimesh==4.5.3
16
+ xatlas==0.0.9
17
+ pyvista==0.44.2
18
+ pymeshfix==0.17.0
19
+ igraph==0.11.8
20
+ git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
21
+ xformers==0.0.27.post2
22
+ kaolin==0.17.0
23
+ spconv-cu120==2.3.6
24
+
25
+ gradio_litmodel3d==0.0.1
26
+ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
27
+ https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
28
+ https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl?download=true
29
+ transformers>=4.30.0
30
+ accelerate>=0.20.0
31
+ diffusers>=0.24.0
32
+ invisible_watermark
33
+ xformers
34
+ sentencepiece
35
+ peft
36
+ safetensors>=0.4.0
37
+ gradio==5.14.0
38
+ huggingface-hub>=0.19.0
39
+ sacremoses
40
+ numpy>=1.24.0
41
+ uvicorn>=0.14.0
42
+ spaces