geyik1 commited on
Commit
ed2eaae
·
verified ·
1 Parent(s): e5f4084

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +212 -0
  2. requirements.txt +42 -0
app.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from diffusers import ShapEPipeline, ShapEImg2ImgPipeline
4
+ from diffusers.utils import export_to_gif
5
+ import os
6
+ from huggingface_hub import HfApi, login
7
+ from PIL import Image
8
+ import numpy as np
9
+ import gc
10
+
11
+ # Force CPU usage
12
+ device = "cpu"
13
+ torch.set_num_threads(4)
14
+ print(f"Using device: {device}")
15
+
16
+ def validate_token(token):
17
+ try:
18
+ login(token=token)
19
+ return True
20
+ except Exception as e:
21
+ print(f"Token validation error: {str(e)}")
22
+ return False
23
+
24
+ def generate_3d_from_text(prompt, token, guidance_scale=7.0, export_format="obj", progress=gr.Progress()):
25
+ try:
26
+ if not validate_token(token):
27
+ return "Invalid Hugging Face token", None, None
28
+
29
+ print(f"Starting generation: {prompt}")
30
+ progress(0.1, "Loading model...")
31
+
32
+ pipe = ShapEPipeline.from_pretrained(
33
+ "openai/shap-e",
34
+ torch_dtype=torch.float32,
35
+ token=token,
36
+ revision="main",
37
+ low_cpu_mem_usage=True
38
+ )
39
+
40
+ os.makedirs("outputs", exist_ok=True)
41
+ safe_prompt = "".join(x for x in prompt if x.isalnum() or x in (" ", "-", "_"))
42
+ base_filename = f"outputs/{safe_prompt}"
43
+
44
+ try:
45
+ progress(0.3, "Creating 3D model...")
46
+ with torch.no_grad():
47
+ output = pipe(
48
+ prompt,
49
+ guidance_scale=min(guidance_scale, 10.0),
50
+ num_inference_steps=16
51
+ )
52
+
53
+ progress(0.5, "Creating GIF...")
54
+ gif_path = export_to_gif(output.images, f"{base_filename}.gif")
55
+
56
+ progress(0.7, "Creating 3D mesh...")
57
+ mesh_output = pipe(
58
+ prompt,
59
+ guidance_scale=min(guidance_scale, 10.0),
60
+ num_inference_steps=16,
61
+ output_type="mesh"
62
+ )
63
+
64
+ progress(0.9, "Saving files...")
65
+ output_path = f"{base_filename}.{export_format}"
66
+ mesh_output.meshes[0].export(output_path)
67
+
68
+ del pipe
69
+ del output
70
+ del mesh_output
71
+ gc.collect()
72
+
73
+ print(f"Generation completed: {output_path}")
74
+ progress(1.0, "Completed!")
75
+ return "Generation successful!", gif_path, output_path
76
+
77
+ except Exception as model_error:
78
+ error_msg = f"Model execution error: {str(model_error)}"
79
+ print(error_msg)
80
+ return error_msg, None, None
81
+
82
+ except Exception as e:
83
+ error_msg = f"General error: {str(e)}"
84
+ print(error_msg)
85
+ return error_msg, None, None
86
+
87
+ def generate_3d_from_image(image, token, guidance_scale=7.0, export_format="obj", progress=gr.Progress()):
88
+ try:
89
+ if not validate_token(token):
90
+ return "Invalid Hugging Face token", None, None
91
+
92
+ print("Starting image to 3D generation")
93
+ progress(0.1, "Loading model...")
94
+
95
+ pipe = ShapEImg2ImgPipeline.from_pretrained(
96
+ "openai/shap-e-img2img",
97
+ torch_dtype=torch.float32,
98
+ token=token,
99
+ revision="main",
100
+ low_cpu_mem_usage=True
101
+ )
102
+
103
+ os.makedirs("outputs", exist_ok=True)
104
+
105
+ import time
106
+ timestamp = int(time.time())
107
+ base_filename = f"outputs/image_to_3d_{timestamp}"
108
+
109
+ try:
110
+ progress(0.3, "Preparing image...")
111
+ if isinstance(image, str):
112
+ image = Image.open(image)
113
+ elif isinstance(image, np.ndarray):
114
+ image = Image.fromarray(image)
115
+
116
+ image = image.resize((128, 128))
117
+
118
+ progress(0.5, "Creating 3D model...")
119
+ with torch.no_grad():
120
+ output = pipe(
121
+ image=image,
122
+ guidance_scale=min(guidance_scale, 10.0),
123
+ num_inference_steps=16
124
+ )
125
+
126
+ progress(0.7, "Creating GIF...")
127
+ gif_path = export_to_gif(output.images, f"{base_filename}.gif")
128
+
129
+ progress(0.8, "Creating 3D mesh...")
130
+ mesh_output = pipe(
131
+ image=image,
132
+ guidance_scale=min(guidance_scale, 10.0),
133
+ num_inference_steps=16,
134
+ output_type="mesh"
135
+ )
136
+
137
+ progress(0.9, "Saving files...")
138
+ output_path = f"{base_filename}.{export_format}"
139
+ mesh_output.meshes[0].export(output_path)
140
+
141
+ del pipe
142
+ del output
143
+ del mesh_output
144
+ gc.collect()
145
+
146
+ print(f"Generation completed: {output_path}")
147
+ progress(1.0, "Completed!")
148
+ return "Generation successful!", gif_path, output_path
149
+
150
+ except Exception as model_error:
151
+ error_msg = f"Model execution error: {str(model_error)}"
152
+ print(error_msg)
153
+ return error_msg, None, None
154
+
155
+ except Exception as e:
156
+ error_msg = f"General error: {str(e)}"
157
+ print(error_msg)
158
+ return error_msg, None, None
159
+
160
+ with gr.Blocks() as interface:
161
+ gr.Markdown("# SORA-3D - Text/Image to 3D Model Generator")
162
+ gr.Markdown("Create 3D models from text or image input. You need a Hugging Face token to use this app.")
163
+ gr.Markdown("""
164
+ > **Important Notes**:
165
+ > - Processing time may be longer on CPU
166
+ > - Keep guidance scale under 10 for faster results
167
+ > - Number of steps is fixed at 16
168
+ > - Image size is optimized for quality/speed
169
+ """)
170
+
171
+ with gr.Tab("Text → 3D"):
172
+ with gr.Row():
173
+ with gr.Column():
174
+ text_input = gr.Textbox(label="Enter description for 3D model")
175
+ text_token = gr.Textbox(label="Hugging Face Token", type="password")
176
+ text_guidance = gr.Slider(minimum=1, maximum=10, value=7, label="Guidance Scale")
177
+ text_format = gr.Radio(["obj", "glb"], label="Export Format", value="obj")
178
+ text_button = gr.Button("Generate")
179
+
180
+ with gr.Column():
181
+ text_status = gr.Textbox(label="Status")
182
+ text_preview = gr.Image(label="3D Preview (GIF)")
183
+ text_file = gr.File(label="3D Model File")
184
+
185
+ with gr.Tab("Image → 3D"):
186
+ with gr.Row():
187
+ with gr.Column():
188
+ image_input = gr.Image(label="Image to convert to 3D", type="pil")
189
+ image_token = gr.Textbox(label="Hugging Face Token", type="password")
190
+ image_guidance = gr.Slider(minimum=1, maximum=10, value=7, label="Guidance Scale")
191
+ image_format = gr.Radio(["obj", "glb"], label="Export Format", value="obj")
192
+ image_button = gr.Button("Generate")
193
+
194
+ with gr.Column():
195
+ image_status = gr.Textbox(label="Status")
196
+ image_preview = gr.Image(label="3D Preview (GIF)")
197
+ image_file = gr.File(label="3D Model File")
198
+
199
+ text_button.click(
200
+ generate_3d_from_text,
201
+ inputs=[text_input, text_token, text_guidance, text_format],
202
+ outputs=[text_status, text_preview, text_file]
203
+ )
204
+
205
+ image_button.click(
206
+ generate_3d_from_image,
207
+ inputs=[image_input, image_token, image_guidance, image_format],
208
+ outputs=[image_status, image_preview, image_file]
209
+ )
210
+
211
+ if __name__ == "__main__":
212
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu121
2
+ --find-links https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-2.4.0_cu121.html
3
+
4
+ torch==2.4.0
5
+ torchvision==0.19.0
6
+ pillow==10.4.0
7
+ imageio==2.36.1
8
+ imageio-ffmpeg==0.5.1
9
+ tqdm==4.67.1
10
+ easydict==1.13
11
+ opencv-python-headless==4.10.0.84
12
+ scipy==1.14.1
13
+ rembg==2.0.60
14
+ onnxruntime==1.20.1
15
+ trimesh==4.5.3
16
+ xatlas==0.0.9
17
+ pyvista==0.44.2
18
+ pymeshfix==0.17.0
19
+ igraph==0.11.8
20
+ git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
21
+ xformers==0.0.27.post2
22
+ kaolin==0.17.0
23
+ spconv-cu120==2.3.6
24
+
25
+ gradio_litmodel3d==0.0.1
26
+ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
27
+ https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
28
+ https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl?download=true
29
+ transformers>=4.30.0
30
+ accelerate>=0.20.0
31
+ diffusers>=0.24.0
32
+ invisible_watermark
33
+ xformers
34
+ sentencepiece
35
+ peft
36
+ safetensors>=0.4.0
37
+ gradio==4.44.1
38
+ huggingface-hub>=0.19.0
39
+ sacremoses
40
+ numpy>=1.24.0
41
+ uvicorn>=0.14.0
42
+ spaces